[gomp4 4/6] C front end infrastructure for OpenACC clauses parsing.
From: Thomas Schwinge gcc/c/ * c-parser.c (c_parser_oacc_all_clauses): New function. (c_parser_oacc_parallel): Use it. * c-typeck.c (c_finish_omp_clauses): Update comment. Remove duplicated variable initialization. --- gcc/c/c-parser.c | 59 +++- gcc/c/c-typeck.c | 4 ++-- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git gcc/c/c-parser.c gcc/c/c-parser.c index ce46f31..c8b80db 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -9465,7 +9465,7 @@ c_parser_pragma_pch_preprocess (c_parser *parser) c_common_pch_pragma (parse_in, TREE_STRING_POINTER (name)); } -/* OpenMP 2.5 / 3.0 / 3.1 / 4.0 parsing routines. */ +/* OpenACC and OpenMP parsing routines. */ /* Returns name of the next clause. If the clause is not recognized PRAGMA_OMP_CLAUSE_NONE is returned and @@ -10767,9 +10767,58 @@ c_parser_omp_clause_uniform (c_parser *parser, tree list) return list; } +/* Parse all OpenACC clauses. The set clauses allowed by the directive + is a bitmask in MASK. Return the list of clauses found. */ + +static tree +c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, + const char *where, bool finish_p = true) +{ + tree clauses = NULL; + bool first = true; + + while (c_parser_next_token_is_not (parser, CPP_PRAGMA_EOL)) +{ + location_t here; + pragma_omp_clause c_kind; + const char *c_name; + tree prev = clauses; + + if (!first && c_parser_next_token_is (parser, CPP_COMMA)) + c_parser_consume_token (parser); + + here = c_parser_peek_token (parser)->location; + c_kind = c_parser_omp_clause_name (parser); + + switch (c_kind) + { + default: + c_parser_error (parser, "expected clause"); + goto saw_error; + } + + first = false; + + if (((mask >> c_kind) & 1) == 0 && !parser->error) + { + /* Remove the invalid clause(s) from the list to avoid +confusing the rest of the compiler. */ + clauses = prev; + error_at (here, "%qs is not valid for %qs", c_name, where); + } +} + + saw_error: + c_parser_skip_to_pragma_eol (parser); + + if (finish_p) +return c_finish_omp_clauses (clauses); + + return clauses; +} + /* Parse all OpenMP clauses. The set clauses allowed by the directive - is a bitmask in MASK. Return the list of clauses found; the result - of clause default goes in *pdefault. */ + is a bitmask in MASK. Return the list of clauses found. */ static tree c_parser_omp_all_clauses (c_parser *parser, omp_clause_mask mask, @@ -11019,8 +11068,8 @@ c_parser_oacc_parallel (location_t loc, c_parser *parser) { tree stmt, clauses, block; - clauses = c_parser_omp_all_clauses (parser, OACC_PARALLEL_CLAUSE_MASK, - "#pragma acc parallel"); + clauses = c_parser_oacc_all_clauses (parser, OACC_PARALLEL_CLAUSE_MASK, + "#pragma acc parallel"); gcc_assert (clauses == NULL); block = c_begin_omp_parallel (); diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c index 854e149..81f0c5c 100644 --- gcc/c/c-typeck.c +++ gcc/c/c-typeck.c @@ -11661,7 +11661,7 @@ c_find_omp_placeholder_r (tree *tp, int *, void *data) return NULL_TREE; } -/* For all elements of CLAUSES, validate them vs OpenMP constraints. +/* For all elements of CLAUSES, validate them against their constraints. Remove any elements from the list that are invalid. */ tree @@ -11669,7 +11669,7 @@ c_finish_omp_clauses (tree clauses) { bitmap_head generic_head, firstprivate_head, lastprivate_head; bitmap_head aligned_head; - tree c, t, *pc = &clauses; + tree c, t, *pc; bool branch_seen = false; bool copyprivate_seen = false; tree *nowait_clause = NULL; -- 1.8.1.1
[gomp4 5/6] Initial support in the C front end for OpenACC data clauses.
From: Thomas Schwinge gcc/c-family/ * c-pragma.h (pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_COPY, PRAGMA_OMP_CLAUSE_COPYOUT, PRAGMA_OMP_CLAUSE_CREATE, PRAGMA_OMP_CLAUSE_DELETE, PRAGMA_OMP_CLAUSE_DEVICEPTR, PRAGMA_OMP_CLAUSE_PRESENT, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT, and PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE. gcc/c/ * c-parser.c (c_parser_omp_clause_name): Handle these. (c_parser_oacc_data_clause, c_parser_oacc_data_clause_deviceptr): New functions. (c_parser_oacc_all_clauses): Handle PRAGMA_OMP_CLAUSE_COPY, PRAGMA_OMP_CLAUSE_COPYIN, PRAGMA_OMP_CLAUSE_COPYOUT, PRAGMA_OMP_CLAUSE_CREATE, PRAGMA_OMP_CLAUSE_DELETE, PRAGMA_OMP_CLAUSE_DEVICEPTR, PRAGMA_OMP_CLAUSE_PRESENT, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT, and PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE. gcc/ * tree-core.h (omp_clause_code): Update description for OMP_CLAUSE_MAP. --- gcc/c-family/c-pragma.h | 12 +++- gcc/c/c-parser.c| 171 +++- gcc/tree-core.h | 6 +- 3 files changed, 184 insertions(+), 5 deletions(-) diff --git gcc/c-family/c-pragma.h gcc/c-family/c-pragma.h index 64eed11..2c8af67 100644 --- gcc/c-family/c-pragma.h +++ gcc/c-family/c-pragma.h @@ -63,18 +63,23 @@ typedef enum pragma_kind { } pragma_kind; -/* All clauses defined by OpenMP 2.5, 3.0, 3.1 and 4.0. +/* All clauses defined by OpenACC 2.0, and OpenMP 2.5, 3.0, 3.1, and 4.0. Used internally by both C and C++ parsers. */ typedef enum pragma_omp_clause { PRAGMA_OMP_CLAUSE_NONE = 0, PRAGMA_OMP_CLAUSE_ALIGNED, PRAGMA_OMP_CLAUSE_COLLAPSE, + PRAGMA_OMP_CLAUSE_COPY, PRAGMA_OMP_CLAUSE_COPYIN, + PRAGMA_OMP_CLAUSE_COPYOUT, PRAGMA_OMP_CLAUSE_COPYPRIVATE, + PRAGMA_OMP_CLAUSE_CREATE, PRAGMA_OMP_CLAUSE_DEFAULT, + PRAGMA_OMP_CLAUSE_DELETE, PRAGMA_OMP_CLAUSE_DEPEND, PRAGMA_OMP_CLAUSE_DEVICE, + PRAGMA_OMP_CLAUSE_DEVICEPTR, PRAGMA_OMP_CLAUSE_DIST_SCHEDULE, PRAGMA_OMP_CLAUSE_FINAL, PRAGMA_OMP_CLAUSE_FIRSTPRIVATE, @@ -92,6 +97,11 @@ typedef enum pragma_omp_clause { PRAGMA_OMP_CLAUSE_NUM_THREADS, PRAGMA_OMP_CLAUSE_ORDERED, PRAGMA_OMP_CLAUSE_PARALLEL, + PRAGMA_OMP_CLAUSE_PRESENT, + PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY, + PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN, + PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT, + PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OMP_CLAUSE_PRIVATE, PRAGMA_OMP_CLAUSE_PROC_BIND, PRAGMA_OMP_CLAUSE_REDUCTION, diff --git gcc/c/c-parser.c gcc/c/c-parser.c index c8b80db..48c55e6 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -9496,16 +9496,26 @@ c_parser_omp_clause_name (c_parser *parser) case 'c': if (!strcmp ("collapse", p)) result = PRAGMA_OMP_CLAUSE_COLLAPSE; + else if (!strcmp ("copy", p)) + result = PRAGMA_OMP_CLAUSE_COPY; else if (!strcmp ("copyin", p)) result = PRAGMA_OMP_CLAUSE_COPYIN; + else if (!strcmp ("copyout", p)) + result = PRAGMA_OMP_CLAUSE_COPYOUT; else if (!strcmp ("copyprivate", p)) result = PRAGMA_OMP_CLAUSE_COPYPRIVATE; + else if (!strcmp ("create", p)) + result = PRAGMA_OMP_CLAUSE_CREATE; break; case 'd': - if (!strcmp ("depend", p)) + if (!strcmp ("delete", p)) + result = PRAGMA_OMP_CLAUSE_DELETE; + else if (!strcmp ("depend", p)) result = PRAGMA_OMP_CLAUSE_DEPEND; else if (!strcmp ("device", p)) result = PRAGMA_OMP_CLAUSE_DEVICE; + else if (!strcmp ("deviceptr", p)) + result = PRAGMA_OMP_CLAUSE_DEVICEPTR; else if (!strcmp ("dist_schedule", p)) result = PRAGMA_OMP_CLAUSE_DIST_SCHEDULE; break; @@ -9550,6 +9560,16 @@ c_parser_omp_clause_name (c_parser *parser) case 'p': if (!strcmp ("parallel", p)) result = PRAGMA_OMP_CLAUSE_PARALLEL; + else if (!strcmp ("present", p)) + result = PRAGMA_OMP_CLAUSE_PRESENT; + else if (!strcmp ("present_or_copy", p)) + result = PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY; + else if (!strcmp ("present_or_copyin", p)) + result = PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN; + else if (!strcmp ("present_or_copyout", p)) + result = PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT; + else if (!strcmp ("present_or_create", p)) + result = PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE; else if (!strcmp ("private", p))
[gomp4 2/6] Prepare for extending omp_clause_map_kind.
From: Thomas Schwinge gcc/ * tree-core.h (omp_clause_map_kind): Make the identifiers' bit patterns more obvious. Add comments. * omp-low.c (lower_oacc_parallel, lower_omp_target): Test for omp_clause_map_kind flags set instead of for values. --- gcc/omp-low.c | 22 ++ gcc/tree-core.h | 16 +++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git gcc/omp-low.c gcc/omp-low.c index eb755c3..899e970 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -8855,13 +8855,16 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree avar = create_tmp_var (TREE_TYPE (var), NULL); mark_addressable (avar); - if (OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_ALLOC - && OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_FROM) + enum omp_clause_map_kind map_kind + = OMP_CLAUSE_MAP_KIND (c); + if ((!(map_kind & OMP_CLAUSE_MAP_SPECIAL) +&& (map_kind & OMP_CLAUSE_MAP_TO)) + || map_kind == OMP_CLAUSE_MAP_POINTER) gimplify_assign (avar, var, &ilist); avar = build_fold_addr_expr (avar); gimplify_assign (x, avar, &ilist); - if ((OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_FROM -|| OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_TOFROM) + if ((!(map_kind & OMP_CLAUSE_MAP_SPECIAL) +&& (map_kind & OMP_CLAUSE_MAP_FROM)) && !TYPE_READONLY (TREE_TYPE (var))) { x = build_sender_ref (ovar, ctx); @@ -10331,13 +10334,16 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_assert (kind == GF_OMP_TARGET_KIND_REGION); tree avar = create_tmp_var (TREE_TYPE (var), NULL); mark_addressable (avar); - if (OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_ALLOC - && OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_FROM) + enum omp_clause_map_kind map_kind + = OMP_CLAUSE_MAP_KIND (c); + if ((!(map_kind & OMP_CLAUSE_MAP_SPECIAL) +&& (map_kind & OMP_CLAUSE_MAP_TO)) + || map_kind == OMP_CLAUSE_MAP_POINTER) gimplify_assign (avar, var, &ilist); avar = build_fold_addr_expr (avar); gimplify_assign (x, avar, &ilist); - if ((OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_FROM -|| OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_TOFROM) + if ((!(map_kind & OMP_CLAUSE_MAP_SPECIAL) +&& (map_kind & OMP_CLAUSE_MAP_FROM)) && !TYPE_READONLY (TREE_TYPE (var))) { x = build_sender_ref (ovar, ctx); diff --git gcc/tree-core.h gcc/tree-core.h index e2750e0..3602b5f 100644 --- gcc/tree-core.h +++ gcc/tree-core.h @@ -1112,14 +1112,20 @@ enum omp_clause_depend_kind enum omp_clause_map_kind { - OMP_CLAUSE_MAP_ALLOC, - OMP_CLAUSE_MAP_TO, - OMP_CLAUSE_MAP_FROM, - OMP_CLAUSE_MAP_TOFROM, + /* If not already present, allocate. */ + OMP_CLAUSE_MAP_ALLOC = 0, + /* ..., and copy to device. */ + OMP_CLAUSE_MAP_TO = 1 << 0, + /* ..., and copy from device. */ + OMP_CLAUSE_MAP_FROM = 1 << 1, + /* ..., and copy to and from device. */ + OMP_CLAUSE_MAP_TOFROM = OMP_CLAUSE_MAP_TO | OMP_CLAUSE_MAP_FROM, + /* Special map kinds. */ + OMP_CLAUSE_MAP_SPECIAL = 1 << 2, /* The following kind is an internal only map kind, used for pointer based array sections. OMP_CLAUSE_SIZE for these is not the pointer size, which is implicitly POINTER_SIZE / BITS_PER_UNIT, but the bias. */ - OMP_CLAUSE_MAP_POINTER + OMP_CLAUSE_MAP_POINTER = OMP_CLAUSE_MAP_SPECIAL }; enum omp_clause_proc_bind_kind -- 1.8.1.1
[gomp4 6/6] Enable initial support in the C front end for OpenACC data clauses.
From: Thomas Schwinge gcc/c/ * c-parser.c (OACC_PARALLEL_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_COPY, PRAGMA_OMP_CLAUSE_COPYIN, PRAGMA_OMP_CLAUSE_COPYOUT, PRAGMA_OMP_CLAUSE_CREATE, PRAGMA_OMP_CLAUSE_DEVICEPTR, PRAGMA_OMP_CLAUSE_PRESENT, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN, PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT, and PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE. gcc/testsuite/ * c-c++-common/goacc/data-clause-duplicate-1.c: New file. * c-c++-common/goacc/deviceptr-1.c: New file. libgomp/ * testsuite/libgomp.oacc-c/parallel-1.c: Extend. --- gcc/c/c-parser.c | 14 +- .../c-c++-common/goacc/data-clause-duplicate-1.c | 13 ++ gcc/testsuite/c-c++-common/goacc/deviceptr-1.c | 64 + libgomp/testsuite/libgomp.oacc-c/parallel-1.c | 150 +++-- 4 files changed, 228 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c create mode 100644 gcc/testsuite/c-c++-common/goacc/deviceptr-1.c diff --git gcc/c/c-parser.c gcc/c/c-parser.c index 48c55e6..d6a2af0 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -11225,8 +11225,17 @@ c_parser_omp_structured_block (c_parser *parser) LOC is the location of the #pragma token. */ -#define OACC_PARALLEL_CLAUSE_MASK \ - (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NONE) +#define OACC_PARALLEL_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICEPTR)\ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYIN)\ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_PRESENT_OR_CREATE) ) static tree c_parser_oacc_parallel (location_t loc, c_parser *parser) @@ -11235,7 +11244,6 @@ c_parser_oacc_parallel (location_t loc, c_parser *parser) clauses = c_parser_oacc_all_clauses (parser, OACC_PARALLEL_CLAUSE_MASK, "#pragma acc parallel"); - gcc_assert (clauses == NULL); block = c_begin_omp_parallel (); add_stmt (c_parser_omp_structured_block (parser)); diff --git gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c new file mode 100644 index 000..1bcf5be --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c @@ -0,0 +1,13 @@ +void +fun (void) +{ + float *fp; +#pragma acc parallel copy(fp[0:2],fp[0:2]) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc parallel present_or_copyin(fp[3]) present_or_copyout(fp[7:4]) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc parallel create(fp[:10]) deviceptr(fp) + /* { dg-error "'fp' appears more than once in map clauses" "" { target *-*-* } 9 } */ + /* { dg-message "sorry, unimplemented: data clause not yet implemented" "" { target *-*-* } 9 } */ + ; +} diff --git gcc/testsuite/c-c++-common/goacc/deviceptr-1.c gcc/testsuite/c-c++-common/goacc/deviceptr-1.c new file mode 100644 index 000..0f0cf0c --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/deviceptr-1.c @@ -0,0 +1,64 @@ +void +fun1 (void) +{ +#pragma acc parallel deviceptr(u) /* { dg-error "'u' undeclared" } */ + ; +#pragma acc parallel deviceptr(u[0:4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; + +#pragma acc parallel deviceptr(fun1) /* { dg-error "'fun1' is not a variable" } */ + ; +#pragma acc parallel deviceptr(fun1[2:5]) + /* { dg-error "'fun1' is not a variable" "not a variable" { target *-*-* } 11 } */ + /* { dg-error "expected '\\\)' before '\\\[' token" "array" { target *-*-* } 11 } */ + ; + + int i; +#pragma acc parallel deviceptr(i) /* { dg-error "'i' is not a pointer variable" } */ + ; +#pragma acc parallel deviceptr(i[0:4]) + /* { dg-error "'i' is not a pointer variable" "not a pointer variable" { target *-*-* } 19 } */ + /* { dg-error "expected '\\\)' before '\\\[' tok
[gomp4 3/6] Initial support for OpenACC memory mapping semantics.
From: Thomas Schwinge gcc/ * tree-core.h (omp_clause_map_kind): Add OMP_CLAUSE_MAP_FORCE, OMP_CLAUSE_MAP_FORCE_ALLOC, OMP_CLAUSE_MAP_FORCE_TO, OMP_CLAUSE_MAP_FORCE_FROM, OMP_CLAUSE_MAP_FORCE_TOFROM, OMP_CLAUSE_MAP_FORCE_PRESENT, OMP_CLAUSE_MAP_FORCE_DEALLOC, and OMP_CLAUSE_MAP_FORCE_DEVICEPTR. * tree-pretty-print.c (dump_omp_clause): Handle these. * gimplify.c (gimplify_omp_var_data): Add GOVD_MAP_FORCE. (omp_region_type): Add ORT_TARGET_MAP_FORCE. (omp_add_variable, omp_notice_threadprivate_variable) (omp_notice_variable, gimplify_scan_omp_clauses) (gimplify_adjust_omp_clauses_1): Extend accordingly. (gimplify_oacc_parallel): Add ORT_TARGET_MAP_FORCE to ORT_TARGET usage. * omp-low.c (install_var_field, scan_sharing_clauses) (lower_oacc_parallel, lower_omp_target): Extend accordingly. --- gcc/gimplify.c | 92 ++--- gcc/omp-low.c | 33 +++--- gcc/tree-core.h | 19 +- gcc/tree-pretty-print.c | 21 +++ 4 files changed, 140 insertions(+), 25 deletions(-) diff --git gcc/gimplify.c gcc/gimplify.c index 90507c2..633784f 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -69,7 +69,13 @@ enum gimplify_omp_var_data GOVD_PRIVATE_OUTER_REF = 1024, GOVD_LINEAR = 2048, GOVD_ALIGNED = 4096, + + /* Flags for GOVD_MAP. */ + /* Don't copy back. */ GOVD_MAP_TO_ONLY = 8192, + /* Force a specific behavior (or else, a run-time error). */ + GOVD_MAP_FORCE = 16384, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -86,7 +92,11 @@ enum omp_region_type ORT_UNTIED_TASK = 5, ORT_TEAMS = 8, ORT_TARGET_DATA = 16, - ORT_TARGET = 32 + ORT_TARGET = 32, + + /* Flags for ORT_TARGET. */ + /* Default to GOVD_MAP_FORCE for implicit mappings in this region. */ + ORT_TARGET_MAP_FORCE = 64 }; /* Gimplify hashtable helper. */ @@ -5430,9 +5440,20 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) copy into or out of the context. */ if (!(flags & GOVD_LOCAL)) { - nflags = flags & GOVD_MAP - ? GOVD_MAP | GOVD_MAP_TO_ONLY | GOVD_EXPLICIT - : flags & GOVD_PRIVATE ? GOVD_PRIVATE : GOVD_FIRSTPRIVATE; + if (flags & GOVD_MAP) + { + nflags = GOVD_MAP | GOVD_MAP_TO_ONLY | GOVD_EXPLICIT; +#if 0 + /* Not sure if this is actually needed; haven't found a case +where this would change anything; TODO. */ + if (flags & GOVD_MAP_FORCE) + nflags |= OMP_CLAUSE_MAP_FORCE; +#endif + } + else if (flags & GOVD_PRIVATE) + nflags = GOVD_PRIVATE; + else + nflags = GOVD_FIRSTPRIVATE; nflags |= flags & GOVD_SEEN; t = DECL_VALUE_EXPR (decl); gcc_assert (TREE_CODE (t) == INDIRECT_REF); @@ -5501,6 +5522,8 @@ omp_notice_threadprivate_variable (struct gimplify_omp_ctx *ctx, tree decl, for (octx = ctx; octx; octx = octx->outer_context) if (octx->region_type & ORT_TARGET) { + gcc_assert (!(octx->region_type & ORT_TARGET_MAP_FORCE)); + n = splay_tree_lookup (octx->variables, (splay_tree_key)decl); if (n == NULL) { @@ -5562,19 +5585,45 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) n = splay_tree_lookup (ctx->variables, (splay_tree_key)decl); if (ctx->region_type & ORT_TARGET) { + unsigned map_force; + if (ctx->region_type & ORT_TARGET_MAP_FORCE) + map_force = GOVD_MAP_FORCE; + else + map_force = 0; if (n == NULL) { if (!lang_hooks.types.omp_mappable_type (TREE_TYPE (decl))) { error ("%qD referenced in target region does not have " "a mappable type", decl); - omp_add_variable (ctx, decl, GOVD_MAP | GOVD_EXPLICIT | flags); + omp_add_variable (ctx, decl, GOVD_MAP | map_force | GOVD_EXPLICIT | flags); } else - omp_add_variable (ctx, decl, GOVD_MAP | flags); + omp_add_variable (ctx, decl, GOVD_MAP | map_force | flags); } else - n->value |= flags; + { +#if 0 + /* The following fails for: + +int l = 10; +float c[l]; +#pragma acc parallel copy(c[2:4]) + { +#pragma acc parallel +{ + int t = sizeof c; +} + } + +..., which we currently don't have to care about (nesting +disabled), b
[gomp4 1/6] During gimplification, allow additional flags next to ORT_TARGET.
From: Thomas Schwinge gcc/ * gimplify.c (gimplify_call_expr, gimplify_modify_expr) (omp_firstprivatize_variable, omp_notice_threadprivate_variable) (omp_notice_variable, gimplify_adjust_omp_clauses) (gimplify_omp_workshare): Treat ORT_TARGET as a flag, not as a value. --- gcc/gimplify.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git gcc/gimplify.c gcc/gimplify.c index e45bed2..90507c2 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -2363,7 +2363,7 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) during omplower pass instead. */ struct gimplify_omp_ctx *ctx; for (ctx = gimplify_omp_ctxp; ctx; ctx = ctx->outer_context) - if (ctx->region_type == ORT_TARGET) + if (ctx->region_type & ORT_TARGET) break; if (ctx == NULL) fold_stmt (&gsi); @@ -4534,7 +4534,7 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, during omplower pass instead. */ struct gimplify_omp_ctx *ctx; for (ctx = gimplify_omp_ctxp; ctx; ctx = ctx->outer_context) -if (ctx->region_type == ORT_TARGET) +if (ctx->region_type & ORT_TARGET) break; if (ctx == NULL) fold_stmt (&gsi); @@ -5317,7 +5317,7 @@ omp_firstprivatize_variable (struct gimplify_omp_ctx *ctx, tree decl) else return; } - else if (ctx->region_type == ORT_TARGET) + else if (ctx->region_type & ORT_TARGET) omp_add_variable (ctx, decl, GOVD_MAP | GOVD_MAP_TO_ONLY); else if (ctx->region_type != ORT_WORKSHARE && ctx->region_type != ORT_SIMD @@ -5499,7 +5499,7 @@ omp_notice_threadprivate_variable (struct gimplify_omp_ctx *ctx, tree decl, struct gimplify_omp_ctx *octx; for (octx = ctx; octx; octx = octx->outer_context) -if (octx->region_type == ORT_TARGET) +if (octx->region_type & ORT_TARGET) { n = splay_tree_lookup (octx->variables, (splay_tree_key)decl); if (n == NULL) @@ -5560,7 +5560,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) } n = splay_tree_lookup (ctx->variables, (splay_tree_key)decl); - if (ctx->region_type == ORT_TARGET) + if (ctx->region_type & ORT_TARGET) { if (n == NULL) { @@ -6285,7 +6285,7 @@ gimplify_adjust_omp_clauses (tree *list_p) if (!DECL_P (decl)) break; n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl); - if (ctx->region_type == ORT_TARGET && !(n->value & GOVD_SEEN)) + if ((ctx->region_type & ORT_TARGET) && !(n->value & GOVD_SEEN)) remove = true; else if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST @@ -6857,7 +6857,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) gcc_unreachable (); } gimplify_scan_omp_clauses (&OMP_CLAUSES (expr), pre_p, ort); - if (ort == ORT_TARGET || ort == ORT_TARGET_DATA) + if ((ort & ORT_TARGET) || ort == ORT_TARGET_DATA) { push_gimplify_context (); gimple g = gimplify_and_return_first (OMP_BODY (expr), &body); -- 1.8.1.1
[gomp4 1/9] Add missing include.
From: Thomas Schwinge libgomp/ * libgomp_g.h: Include for size_t. --- libgomp/libgomp_g.h | 1 + 1 file changed, 1 insertion(+) diff --git libgomp/libgomp_g.h libgomp/libgomp_g.h index 32c4cf6..577956a 100644 --- libgomp/libgomp_g.h +++ libgomp/libgomp_g.h @@ -29,6 +29,7 @@ #define LIBGOMP_G_H 1 #include +#include /* barrier.c */ -- 1.8.1.1
[gomp4 2/9] libgomp: Prepare for testcases without -fopenmp.
From: Thomas Schwinge libgomp/ * testsuite/lib/libgomp.exp (libgomp_init): Don't add -fopenmp to ALWAYS_CFLAGS. * testsuite/libgomp.c++/c++.exp (ALWAYS_CFLAGS): Add -fopenmp. * testsuite/libgomp.c/c.exp (ALWAYS_CFLAGS): Likewise. * testsuite/libgomp.fortran/fortran.exp (ALWAYS_CFLAGS): Likewise. * testsuite/libgomp.graphite/graphite.exp (ALWAYS_CFLAGS): Likewise. --- libgomp/testsuite/lib/libgomp.exp | 3 --- libgomp/testsuite/libgomp.c++/c++.exp | 3 +++ libgomp/testsuite/libgomp.c/c.exp | 3 +++ libgomp/testsuite/libgomp.fortran/fortran.exp | 3 +++ libgomp/testsuite/libgomp.graphite/graphite.exp | 3 +++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git libgomp/testsuite/lib/libgomp.exp libgomp/testsuite/lib/libgomp.exp index d1d8bc8..c965147 100644 --- libgomp/testsuite/lib/libgomp.exp +++ libgomp/testsuite/lib/libgomp.exp @@ -169,9 +169,6 @@ proc libgomp_init { args } { # Disable color diagnostics lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never" - -# And, gee, turn on OpenMP. -lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" } # diff --git libgomp/testsuite/libgomp.c++/c++.exp libgomp/testsuite/libgomp.c++/c++.exp index b336306..88e017e 100644 --- libgomp/testsuite/libgomp.c++/c++.exp +++ libgomp/testsuite/libgomp.c++/c++.exp @@ -11,6 +11,9 @@ set lang_library_path "../libstdc++-v3/src/.libs" # Initialize dg. dg-init +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + set blddir [lookfor_file [get_multilibs] libgomp] diff --git libgomp/testsuite/libgomp.c/c.exp libgomp/testsuite/libgomp.c/c.exp index 7dfdf8b..8e902d4 100644 --- libgomp/testsuite/libgomp.c/c.exp +++ libgomp/testsuite/libgomp.c/c.exp @@ -17,6 +17,9 @@ if ![info exists DEFAULT_CFLAGS] then { # Initialize dg. dg-init +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + # Gather a list of all tests. set tests [lsort [find $srcdir/$subdir *.c]] diff --git libgomp/testsuite/libgomp.fortran/fortran.exp libgomp/testsuite/libgomp.fortran/fortran.exp index b7fef29..e0bffe3 100644 --- libgomp/testsuite/libgomp.fortran/fortran.exp +++ libgomp/testsuite/libgomp.fortran/fortran.exp @@ -15,6 +15,9 @@ set quadmath_library_path "../libquadmath/.libs" # Initialize dg. dg-init +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + if { $blddir != "" } { lappend ALWAYS_CFLAGS "additional_flags=-fintrinsic-modules-path=${blddir}" # Look for a static libgfortran first. diff --git libgomp/testsuite/libgomp.graphite/graphite.exp libgomp/testsuite/libgomp.graphite/graphite.exp index 08aa509..9129964 100644 --- libgomp/testsuite/libgomp.graphite/graphite.exp +++ libgomp/testsuite/libgomp.graphite/graphite.exp @@ -42,6 +42,9 @@ set PARALLEL_CFLAGS "-ansi -pedantic-errors -O2 \ # Initialize `dg'. dg-init +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + # Gather a list of all tests. set tests [lsort [find $srcdir/$subdir *.c]] -- 1.8.1.1
[gomp4 5/9] OpenACC: preprocessor definition, Fortran integer parameter.
From: Thomas Schwinge gcc/c-family/ * c-cppbuiltin.c (c_cpp_builtins): Conditionally define _OPENACC. gcc/fortran/ * cpp.c (cpp_define_builtins): Conditionally define _OPENACC. gcc/testsuite/ * c-c++-common/cpp/openacc-define-1.c: Test _OPENACC. * c-c++-common/cpp/openacc-define-2.c: Likewise. * c-c++-common/cpp/openacc-define-3.c: Likewise. * gfortran.dg/openacc-define-1.f90: Likewise. * gfortran.dg/openacc-define-2.f90: Likewise. * gfortran.dg/openacc-define-3.f90: Likewise. libgomp/ * openacc.f90 (openacc_version): New integer parameter. * openacc_lib.h (openacc_version): Likewise. * testsuite/libgomp.oacc-fortran/openacc_version-1.f: New file. * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise. --- gcc/c-family/c-cppbuiltin.c | 3 +++ gcc/fortran/cpp.c| 3 +++ gcc/testsuite/c-c++-common/cpp/openacc-define-1.c| 4 gcc/testsuite/c-c++-common/cpp/openacc-define-2.c| 4 gcc/testsuite/c-c++-common/cpp/openacc-define-3.c| 8 gcc/testsuite/gfortran.dg/openacc-define-1.f90 | 4 gcc/testsuite/gfortran.dg/openacc-define-2.f90 | 4 gcc/testsuite/gfortran.dg/openacc-define-3.f90 | 8 libgomp/openacc.f90 | 2 ++ libgomp/openacc_lib.h| 3 +++ libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f | 9 + libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 | 9 + 12 files changed, 61 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 diff --git gcc/c-family/c-cppbuiltin.c gcc/c-family/c-cppbuiltin.c index ed4c82c..d48d96f 100644 --- gcc/c-family/c-cppbuiltin.c +++ gcc/c-family/c-cppbuiltin.c @@ -895,6 +895,9 @@ c_cpp_builtins (cpp_reader *pfile) else if (flag_stack_protect == 1) cpp_define (pfile, "__SSP__=1"); + if (flag_openacc) +cpp_define (pfile, "_OPENACC=201306"); + if (flag_openmp) cpp_define (pfile, "_OPENMP=201307"); diff --git gcc/fortran/cpp.c gcc/fortran/cpp.c index ea53681..58f6cc9 100644 --- gcc/fortran/cpp.c +++ gcc/fortran/cpp.c @@ -169,6 +169,9 @@ cpp_define_builtins (cpp_reader *pfile) cpp_define (pfile, "__GFORTRAN__=1"); cpp_define (pfile, "_LANGUAGE_FORTRAN=1"); + if (gfc_option.gfc_flag_openacc) +cpp_define (pfile, "_OPENACC=201306"); + if (gfc_option.gfc_flag_openmp) cpp_define (pfile, "_OPENMP=201107"); diff --git gcc/testsuite/c-c++-common/cpp/openacc-define-1.c gcc/testsuite/c-c++-common/cpp/openacc-define-1.c index feaf778..cd37548 100644 --- gcc/testsuite/c-c++-common/cpp/openacc-define-1.c +++ gcc/testsuite/c-c++-common/cpp/openacc-define-1.c @@ -1,2 +1,6 @@ /* { dg-do preprocess } */ /* { dg-require-effective-target fopenacc } */ + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git gcc/testsuite/c-c++-common/cpp/openacc-define-2.c gcc/testsuite/c-c++-common/cpp/openacc-define-2.c index a2f3e28..b007e32 100644 --- gcc/testsuite/c-c++-common/cpp/openacc-define-2.c +++ gcc/testsuite/c-c++-common/cpp/openacc-define-2.c @@ -1,3 +1,7 @@ /* { dg-options "-fno-openacc" } */ /* { dg-do preprocess } */ /* { dg-require-effective-target fopenacc } */ + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git gcc/testsuite/c-c++-common/cpp/openacc-define-3.c gcc/testsuite/c-c++-common/cpp/openacc-define-3.c index ce270c3..ccedcd9 100644 --- gcc/testsuite/c-c++-common/cpp/openacc-define-3.c +++ gcc/testsuite/c-c++-common/cpp/openacc-define-3.c @@ -1,3 +1,11 @@ /* { dg-options "-fopenacc" } */ /* { dg-do preprocess } */ /* { dg-require-effective-target fopenacc } */ + +#ifndef _OPENACC +# error _OPENACC not defined +#endif + +#if _OPENACC != 201306 +# error _OPENACC defined to wrong value +#endif diff --git gcc/testsuite/gfortran.dg/openacc-define-1.f90 gcc/testsuite/gfortran.dg/openacc-define-1.f90 index b961468..42f4073 100644 --- gcc/testsuite/gfortran.dg/openacc-define-1.f90 +++ gcc/testsuite/gfortran.dg/openacc-define-1.f90 @@ -1,3 +1,7 @@ ! { dg-options "-cpp" } ! { dg-do preprocess } ! { dg-require-effective-target fopenacc } + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git gcc/testsuite/gfortran.dg/openacc-define-2.f90 gcc/testsuite/gfortran.dg/openacc-define-2.f90 index 49b714d..8ad1bd5 100644 --- gcc/testsuite/gfortran.dg/openacc-define-2.f90 +++ gcc/testsuite/gfortran.dg/openacc-define-2.f90 @@ -1,3 +1,7 @@ ! { dg-options "-cpp -fno-openacc" } ! { dg-do preprocess } ! { dg-require-effective-target fopenacc } + +#ifdef _OPEN
[gomp4 7/9] OpenACC: Use OpenMP's lowering and expansion passes.
From: Thomas Schwinge gcc/ * gimplify.c (gimplify_body): Consider flag_openacc additionally to flag_openmp. * omp-low.c (execute_expand_omp, execute_lower_omp) (gate_diagnose_omp_blocks): Likewise. gcc/testsuite/ * gcc.dg/goacc-gomp/goacc-gomp.exp: New file. * gcc.dg/goacc/goacc.exp: Likewise. --- gcc/gimplify.c | 4 +-- gcc/omp-low.c | 10 --- gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp | 38 ++ gcc/testsuite/gcc.dg/goacc/goacc.exp | 37 + 4 files changed, 83 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp create mode 100644 gcc/testsuite/gcc.dg/goacc/goacc.exp diff --git gcc/gimplify.c gcc/gimplify.c index 1f18466..30c2b45 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -8803,7 +8803,7 @@ gimplify_body (tree fndecl, bool do_parms) gcc_assert (gimplify_ctxp == NULL); push_gimplify_context (&gctx); - if (flag_openmp) + if (flag_openacc || flag_openmp) { gcc_assert (gimplify_omp_ctxp == NULL); if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))) @@ -8872,7 +8872,7 @@ gimplify_body (tree fndecl, bool do_parms) nonlocal_vlas = NULL; } - if (flag_openmp && gimplify_omp_ctxp) + if ((flag_openacc || flag_openmp) && gimplify_omp_ctxp) { delete_omp_context (gimplify_omp_ctxp); gimplify_omp_ctxp = NULL; diff --git gcc/omp-low.c gcc/omp-low.c index 94058af..99811d0 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -8234,7 +8234,8 @@ execute_expand_omp (void) static bool gate_expand_omp (void) { - return (flag_openmp != 0 && !seen_error ()); + return ((flag_openacc || flag_openmp) + && !seen_error ()); } namespace { @@ -10054,8 +10055,9 @@ execute_lower_omp (void) gimple_seq body; /* This pass always runs, to provide PROP_gimple_lomp. - But there is nothing to do unless -fopenmp is given. */ - if (flag_openmp == 0) + But there is nothing to do unless at least one of -fopenacc or -fopenmp is + given. */ + if (!(flag_openacc || flag_openmp)) return 0; all_contexts = splay_tree_new (splay_tree_compare_pointers, 0, @@ -10484,7 +10486,7 @@ diagnose_omp_structured_block_errors (void) static bool gate_diagnose_omp_blocks (void) { - return flag_openmp != 0; + return flag_openacc || flag_openmp; } namespace { diff --git gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp new file mode 100644 index 000..29e9a93 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp @@ -0,0 +1,38 @@ +# Copyright (C) 2006-2013 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +if { ![check_effective_target_fopenacc] \ + || ![check_effective_target_fopenmp] } { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [concat \ + [find $srcdir/$subdir *.c] \ + [find $srcdir/c-c++-common/goacc-gomp *.c]]] "" "-fopenacc -fopenmp" + +# All done. +dg-finish diff --git gcc/testsuite/gcc.dg/goacc/goacc.exp gcc/testsuite/gcc.dg/goacc/goacc.exp new file mode 100644 index 000..1137c99 --- /dev/null +++ gcc/testsuite/gcc.dg/goacc/goacc.exp @@ -0,0 +1,37 @@ +# Copyright (C) 2006-2013 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-
[gomp4 3/9] OpenACC: Recognize -fopenacc.
From: Thomas Schwinge gcc/c-family/ * c.opt (fopenacc): New option. gcc/fortran/ * lang.opt (fopenacc): New option. * invoke.texi (-fopenacc): Document it. * gfortran.h (gfc_option_t): New member. * options.c (gfc_init_options, gfc_handle_option): Handle it. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_fopenacc): New procedure. gcc/ * doc/invoke.texi (-fopenacc): Document it. * doc/sourcebuild.texi (fopenacc): Document it. gcc/testsuite/ * c-c++-common/cpp/openacc-define-1.c: New file. * c-c++-common/cpp/openacc-define-2.c: Likewise. * c-c++-common/cpp/openacc-define-3.c: Likewise. * gfortran.dg/openacc-define-1.f90: Likewise. * gfortran.dg/openacc-define-2.f90: Likewise. * gfortran.dg/openacc-define-3.f90: Likewise. --- gcc/c-family/c.opt| 4 gcc/doc/invoke.texi | 11 ++- gcc/doc/sourcebuild.texi | 3 +++ gcc/fortran/gfortran.h| 1 + gcc/fortran/invoke.texi | 7 ++- gcc/fortran/lang.opt | 4 gcc/fortran/options.c | 5 + gcc/testsuite/c-c++-common/cpp/openacc-define-1.c | 2 ++ gcc/testsuite/c-c++-common/cpp/openacc-define-2.c | 3 +++ gcc/testsuite/c-c++-common/cpp/openacc-define-3.c | 3 +++ gcc/testsuite/gfortran.dg/openacc-define-1.f90| 3 +++ gcc/testsuite/gfortran.dg/openacc-define-2.f90| 3 +++ gcc/testsuite/gfortran.dg/openacc-define-3.f90| 3 +++ gcc/testsuite/lib/target-supports.exp | 9 + 14 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/cpp/openacc-define-1.c create mode 100644 gcc/testsuite/c-c++-common/cpp/openacc-define-2.c create mode 100644 gcc/testsuite/c-c++-common/cpp/openacc-define-3.c create mode 100644 gcc/testsuite/gfortran.dg/openacc-define-1.f90 create mode 100644 gcc/testsuite/gfortran.dg/openacc-define-2.f90 create mode 100644 gcc/testsuite/gfortran.dg/openacc-define-3.f90 diff --git gcc/c-family/c.opt gcc/c-family/c.opt index b862eb9..d86d79b 100644 --- gcc/c-family/c.opt +++ gcc/c-family/c.opt @@ -1065,6 +1065,10 @@ fobjc-std=objc1 ObjC ObjC++ Var(flag_objc1_only) Conform to the Objective-C 1.0 language as implemented in GCC 4.0 +fopenacc +C ObjC C++ ObjC++ Var(flag_openacc) +Enable OpenACC + fopenmp C ObjC C++ ObjC++ Var(flag_openmp) Enable OpenMP (implies -frecursive in Fortran) diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi index e84bca3..e393139 100644 --- gcc/doc/invoke.texi +++ gcc/doc/invoke.texi @@ -168,7 +168,8 @@ in the following sections. @gccoptlist{-ansi -std=@var{standard} -fgnu89-inline @gol -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol -fno-asm -fno-builtin -fno-builtin-@var{function} @gol --fhosted -ffreestanding -fopenmp -fms-extensions -fplan9-extensions @gol +-fhosted -ffreestanding -fopenacc -fopenmp -fms-extensions @gol +-fplan9-extensions @gol -trigraphs -traditional -traditional-cpp @gol -fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol -fsigned-bitfields -fsigned-char @gol @@ -1831,6 +1832,14 @@ This is equivalent to @option{-fno-hosted}. @xref{Standards,,Language Standards Supported by GCC}, for details of freestanding and hosted environments. +@item -fopenacc +@opindex fopenacc +@cindex OpenACC accelerator programming +Enable handling of OpenACC. +When @option{-fopenacc} is specified, the +compiler generates accelerated code according to the OpenACC Application +Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. + @item -fopenmp @opindex fopenmp @cindex OpenMP parallel diff --git gcc/doc/sourcebuild.texi gcc/doc/sourcebuild.texi index 1a70916..8b0031c 100644 --- gcc/doc/sourcebuild.texi +++ gcc/doc/sourcebuild.texi @@ -1787,6 +1787,9 @@ Target supports Graphite optimizations. @item fixed_point Target supports fixed-point extension to C. +@item fopenacc +Target supports OpenACC via @option{-fopenacc}. + @item fopenmp Target supports OpenMP via @option{-fopenmp}. diff --git gcc/fortran/gfortran.h gcc/fortran/gfortran.h index b28edd8..5089691 100644 --- gcc/fortran/gfortran.h +++ gcc/fortran/gfortran.h @@ -2285,6 +2285,7 @@ typedef struct int blas_matmul_limit; int flag_cray_pointer; int flag_d_lines; + int gfc_flag_openacc; int gfc_flag_openmp; int flag_sign_zero; int flag_stack_arrays; diff --git gcc/fortran/invoke.texi gcc/fortran/invoke.texi index eb678d1..46fca59 100644 --- gcc/fortran/invoke.texi +++ gcc/fortran/invoke.texi @@ -120,7 +120,7 @@ by type. Explanations are in the following sections. -ffixed-line-length-none -ffree-form -ffree-line-length-@var{n} @gol -ffree-line-length-none -fimplicit-none -finteger-4-integer-8 @gol
[gomp4 8/9] OpenACC: Basic support for #pragma acc in the C front end.
From: Thomas Schwinge gcc/c-family/ * c-pragma.c (oacc_pragmas): New array. (c_pp_lookup_pragma, init_pragma): Handle it. gcc/ * doc/invoke.texi (-fopenacc): Update. gcc/c/ * c-parser.c (c_parser_omp_all_clauses): Make a parser error message suitable for OpenACC, too. gcc/cp/ * parser.c (cp_parser_omp_all_clauses): Make a parser error message suitable for OpenACC, too. --- gcc/c-family/c-pragma.c | 22 ++ gcc/c/c-parser.c| 2 +- gcc/cp/parser.c | 2 +- gcc/doc/invoke.texi | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git gcc/c-family/c-pragma.c gcc/c-family/c-pragma.c index 3ce77a2..98f98d0 100644 --- gcc/c-family/c-pragma.c +++ gcc/c-family/c-pragma.c @@ -1164,6 +1164,8 @@ typedef struct static vec registered_pp_pragmas; struct omp_pragma_def { const char *name; unsigned int id; }; +static const struct omp_pragma_def oacc_pragmas[] = { +}; static const struct omp_pragma_def omp_pragmas[] = { { "atomic", PRAGMA_OMP_ATOMIC }, { "barrier", PRAGMA_OMP_BARRIER }, @@ -1194,9 +1196,18 @@ static const struct omp_pragma_def omp_pragmas[] = { void c_pp_lookup_pragma (unsigned int id, const char **space, const char **name) { + const int n_oacc_pragmas = sizeof (oacc_pragmas) / sizeof (*oacc_pragmas); const int n_omp_pragmas = sizeof (omp_pragmas) / sizeof (*omp_pragmas); int i; + for (i = 0; i < n_oacc_pragmas; ++i) +if (oacc_pragmas[i].id == id) + { + *space = "acc"; + *name = oacc_pragmas[i].name; + return; + } + for (i = 0; i < n_omp_pragmas; ++i) if (omp_pragmas[i].id == id) { @@ -1348,6 +1359,17 @@ c_invoke_pragma_handler (unsigned int id) void init_pragma (void) { + if (flag_openacc) +{ + const int n_oacc_pragmas + = sizeof (oacc_pragmas) / sizeof (*oacc_pragmas); + int i; + + for (i = 0; i < n_oacc_pragmas; ++i) + cpp_register_deferred_pragma (parse_in, "acc", oacc_pragmas[i].name, + oacc_pragmas[i].id, true, true); +} + if (flag_openmp) { const int n_omp_pragmas = sizeof (omp_pragmas) / sizeof (*omp_pragmas); diff --git gcc/c/c-parser.c gcc/c/c-parser.c index a8f4774..8a1e988 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -10730,7 +10730,7 @@ c_parser_omp_all_clauses (c_parser *parser, omp_clause_mask mask, c_name = "simdlen"; break; default: - c_parser_error (parser, "expected %<#pragma omp%> clause"); + c_parser_error (parser, "expected clause"); goto saw_error; } diff --git gcc/cp/parser.c gcc/cp/parser.c index bbc8e75..c3345ee 100644 --- gcc/cp/parser.c +++ gcc/cp/parser.c @@ -27911,7 +27911,7 @@ cp_parser_omp_all_clauses (cp_parser *parser, omp_clause_mask mask, c_name = "simdlen"; break; default: - cp_parser_error (parser, "expected %<#pragma omp%> clause"); + cp_parser_error (parser, "expected clause"); goto saw_error; } diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi index af8973a..cc4a6da 100644 --- gcc/doc/invoke.texi +++ gcc/doc/invoke.texi @@ -1835,7 +1835,7 @@ freestanding and hosted environments. @item -fopenacc @opindex fopenacc @cindex OpenACC accelerator programming -Enable handling of OpenACC. +Enable handling of OpenACC directives @code{#pragma acc} in C. When @option{-fopenacc} is specified, the compiler generates accelerated code according to the OpenACC Application Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. This option -- 1.8.1.1
[gomp4 6/9] OpenACC: Infrastructure for builtins.
From: Thomas Schwinge gcc/ * oacc-builtins.def: New file. * Makefile.in (BUILTINS_DEF): Add oacc-builtins.def. * builtins.def (DEF_GOACC_BUILTIN): New macro. Include "oacc-builtins.def". gcc/fortran/ * f95-lang.c (DEF_GOACC_BUILTIN): New macro. Include "../oacc-builtins.def". libgomp/ * libgomp.map (GOACC_2.0): New symbol version. --- gcc/Makefile.in| 3 ++- gcc/builtins.def | 10 ++ gcc/fortran/f95-lang.c | 10 ++ gcc/oacc-builtins.def | 28 libgomp/libgomp.map| 3 +++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 gcc/oacc-builtins.def diff --git gcc/Makefile.in gcc/Makefile.in index cc88fb8..0511097 100644 --- gcc/Makefile.in +++ gcc/Makefile.in @@ -871,7 +871,8 @@ FIXED_VALUE_H = fixed-value.h $(MACHMODE_H) double-int.h RTL_H = $(RTL_BASE_H) $(FLAGS_H) genrtl.h READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h PARAMS_H = params.h params.def -BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def \ +BUILTINS_DEF = builtins.def sync-builtins.def \ + oacc-builtins.def omp-builtins.def \ gtm-builtins.def sanitizer.def cilkplus.def cilk-builtins.def INTERNAL_FN_DEF = internal-fn.def INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) diff --git gcc/builtins.def gcc/builtins.def index e2d8849..9a9a20a 100644 --- gcc/builtins.def +++ gcc/builtins.def @@ -139,6 +139,13 @@ along with GCC; see the file COPYING3. If not see DEF_BUILTIN (ENUM, NAME, BUILT_IN_NORMAL, BT_LAST, BT_LAST, false, false, \ false, ATTR_LAST, false, false) +/* Builtin used by the implementation of GNU OpenACC. None of these are + actually implemented in the compiler; they're all in libgomp. */ +#undef DEF_GOACC_BUILTIN +#define DEF_GOACC_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,\ + false, true, true, ATTRS, false, flag_openacc) + /* Builtin used by the implementation of GNU OpenMP. None of these are actually implemented in the compiler; they're all in libgomp. */ #undef DEF_GOMP_BUILTIN @@ -856,6 +863,9 @@ DEF_GCC_BUILTIN (BUILT_IN_LINE, "LINE", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST) /* Synchronization Primitives. */ #include "sync-builtins.def" +/* OpenACC builtins. */ +#include "oacc-builtins.def" + /* OpenMP builtins. */ #include "omp-builtins.def" diff --git gcc/fortran/f95-lang.c gcc/fortran/f95-lang.c index 873c137..69012b6 100644 --- gcc/fortran/f95-lang.c +++ gcc/fortran/f95-lang.c @@ -1035,6 +1035,16 @@ gfc_init_builtin_functions (void) #include "../sync-builtins.def" #undef DEF_SYNC_BUILTIN + if (gfc_option.gfc_flag_openacc) +{ +#undef DEF_GOACC_BUILTIN +#define DEF_GOACC_BUILTIN(code, name, type, attr) \ + gfc_define_builtin ("__builtin_" name, builtin_types[type], \ + code, name, attr); +#include "../oacc-builtins.def" +#undef DEF_GOACC_BUILTIN +} + if (gfc_option.gfc_flag_openmp || flag_tree_parallelize_loops) { #undef DEF_GOMP_BUILTIN diff --git gcc/oacc-builtins.def gcc/oacc-builtins.def new file mode 100644 index 000..fd630e0 --- /dev/null +++ gcc/oacc-builtins.def @@ -0,0 +1,28 @@ +/* This file contains the definitions and documentation for the + OpenACC builtins used in the GNU compiler. + + Copyright (C) 2013 Free Software Foundation, Inc. + + Contributed by Thomas Schwinge . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Before including this file, you should define a macro: + + DEF_GOACC_BUILTIN (ENUM, NAME, TYPE, ATTRS) + + See builtins.def for details. */ diff --git libgomp/libgomp.map libgomp/libgomp.map index 4f87d00..f094ed2 100644 --- libgomp/libgomp.map +++ libgomp/libgomp.map @@ -230,3 +230,6 @@ GOMP_4.0 { OACC_2.0 { }; + +GOACC_2.0 { +}; -- 1.8.1.1
[gomp4 4/9] OpenACC: The runtime library will be implemented in libgomp, too.
From: Thomas Schwinge gcc/ * gcc.c (LINK_COMMAND_SPEC, GOMP_SELF_SPECS): For -fopenacc, link to libgomp and its dependencies. * config/arc/arc.h (LINK_COMMAND_SPEC): Likewise. * config/darwin.h (LINK_COMMAND_SPEC_A): Likewise. * config/i386/mingw32.h (GOMP_SELF_SPECS): Likewise. * config/ia64/hpux.h (LIB_SPEC): Likewise. * config/pa/pa-hpux11.h (LIB_SPEC): Likewise. * config/pa/pa64-hpux.h (LIB_SPEC): Likewise. * doc/invoke.texi (-fopenacc): Update. libgomp/ * libgomp.map (OACC_2.0): New symbol version. * libgomp.spec.in: Update comment. * configure.ac: Likewise. * configure: Regenerate. * Makefile.am (nodist_libsubinclude_HEADERS): Add openacc.h. (nodist_finclude_HEADERS): Add openacc_lib.h, openacc.f90, openacc.mod, and openacc_kinds.mod. (openacc_kinds.mod): New target. (%.mod): New target, generalized from omp_lib.mod. * Makefile.in: Regenerate. * openacc.f90: New file. * openacc.h: Likewise. * openacc_lib.h: Likewise. * testsuite/libgomp.oacc-c++/c++.exp: Likewise. * testsuite/libgomp.oacc-c/c.exp: Likewise. * testsuite/libgomp.oacc-c/lib-1.c: Likewise. * testsuite/libgomp.oacc-fortran/fortran.exp: Likewise. * testsuite/libgomp.oacc-fortran/lib-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-2.f: Likewise. * testsuite/libgomp.oacc-fortran/lib-3.f: Likewise. --- gcc/config/arc/arc.h | 2 +- gcc/config/darwin.h| 2 +- gcc/config/i386/mingw32.h | 2 +- gcc/config/ia64/hpux.h | 2 +- gcc/config/pa/pa-hpux11.h | 2 +- gcc/config/pa/pa64-hpux.h | 12 ++-- gcc/doc/invoke.texi| 4 +- gcc/gcc.c | 7 ++- libgomp/Makefile.am| 9 ++- libgomp/Makefile.in| 10 +++- libgomp/configure | 2 +- libgomp/configure.ac | 2 +- libgomp/libgomp.map| 3 + libgomp/libgomp.spec.in| 2 +- libgomp/openacc.f90| 37 libgomp/openacc.h | 45 ++ libgomp/openacc_lib.h | 26 libgomp/testsuite/libgomp.oacc-c++/c++.exp | 66 + libgomp/testsuite/libgomp.oacc-c/c.exp | 36 +++ libgomp/testsuite/libgomp.oacc-c/lib-1.c | 7 +++ libgomp/testsuite/libgomp.oacc-fortran/fortran.exp | 69 ++ libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 | 3 + libgomp/testsuite/libgomp.oacc-fortran/lib-2.f | 3 + libgomp/testsuite/libgomp.oacc-fortran/lib-3.f | 3 + 24 files changed, 332 insertions(+), 24 deletions(-) create mode 100644 libgomp/openacc.f90 create mode 100644 libgomp/openacc.h create mode 100644 libgomp/openacc_lib.h create mode 100644 libgomp/testsuite/libgomp.oacc-c++/c++.exp create mode 100644 libgomp/testsuite/libgomp.oacc-c/c.exp create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/fortran.exp create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-2.f create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-3.f diff --git gcc/config/arc/arc.h gcc/config/arc/arc.h index 637f7b6..14fc717 100644 --- gcc/config/arc/arc.h +++ gcc/config/arc/arc.h @@ -174,7 +174,7 @@ along with GCC; see the file COPYING3. If not see %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\ %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\ %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\ -%{fopenmp:%:include(libgomp.spec)%(link_gomp)} %(mflib)\ +%{fopenacc|fopenmp:%:include(libgomp.spec)%(link_gomp)} %(mflib)\ %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\ %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\ %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}" diff --git gcc/config/darwin.h gcc/config/darwin.h index 596c9ef..735b6b9 100644 --- gcc/config/darwin.h +++ gcc/config/darwin.h @@ -176,7 +176,7 @@ extern GTY(()) int darwin_ms_struct; %{o*}%{!o:-o a.out} \ %{!nostdlib:%{!nostartfiles:%S}} \ %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \ -%{fopenmp|ftree-parallelize-loops=*: \ +%{fopenacc|fopenmp|ftree-parallelize-loops=*: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \ %{fgnu-tm: \
Re: [Patch] Fortran: Support OpenMP's 'allocate' directive for stack vars
Hi Tobias! On 2023-10-13T15:29:52+0200, Tobias Burnus wrote: > => Updated patch attached When cherry-picking this commit 2d3dbf0eff668bed5f5f168b3cafd8590c54 "Fortran: Support OpenMP's 'allocate' directive for stack vars" on top of slightly older GCC sources (mentioning that just in case that's relevant), in a configuration with offloading enabled (only), I see: +FAIL: gfortran.dg/gomp/allocate-13.f90 -O (internal compiler error: tree code 'statement_list' is not supported in LTO streams) +FAIL: gfortran.dg/gomp/allocate-13.f90 -O (test for excess errors) during IPA pass: modref [...]/gcc/testsuite/gfortran.dg/gomp/allocate-13.f90:10:3: internal compiler error: tree code 'statement_list' is not supported in LTO streams 0x13374fd lto_write_tree [...]/gcc/lto-streamer-out.cc:561 0x13374fd lto_output_tree_1 [...]/gcc/lto-streamer-out.cc:599 0x133f55b DFS::DFS(output_block*, tree_node*, bool, bool, bool) [...]/gcc/lto-streamer-out.cc:899 0x1340287 lto_output_tree(output_block*, tree_node*, bool, bool) [...]/gcc/lto-streamer-out.cc:1865 0x134197a output_function [...]/gcc/lto-streamer-out.cc:2436 0x134197a lto_output() [...]/gcc/lto-streamer-out.cc:2807 0x13d0551 write_lto [...]/gcc/passes.cc:2774 0x13d0551 ipa_write_summaries_1 [...]/gcc/passes.cc:2838 0x13d0551 ipa_write_summaries() [...]/gcc/passes.cc:2894 0x1002f2c ipa_passes [...]/gcc/cgraphunit.cc:2251 0x1002f2c symbol_table::compile() [...]/gcc/cgraphunit.cc:2331 0x10056b7 symbol_table::compile() [...]/gcc/cgraphunit.cc:2311 0x10056b7 symbol_table::finalize_compilation_unit() [...]/gcc/cgraphunit.cc:2583 Similarly: +FAIL: libgomp.fortran/allocate-6.f90 -O (internal compiler error: tree code 'statement_list' is not supported in LTO streams) +FAIL: libgomp.fortran/allocate-7.f90 -O (internal compiler error: tree code 'statement_list' is not supported in LTO streams) Grüße Thomas > Fortran: Support OpenMP's 'allocate' directive for stack vars > > gcc/fortran/ChangeLog: > > * gfortran.h (ext_attr_t): Add omp_allocate flag. > * match.cc (gfc_free_omp_namelist): Void deleting same > u2.allocator multiple times now that a sequence can use > the same one. > * openmp.cc (gfc_match_omp_clauses, gfc_match_omp_allocate): Use > same allocator expr multiple times. > (is_predefined_allocator): Make static. > (gfc_resolve_omp_allocate): Update/extend restriction checks; > remove sorry message. > (resolve_omp_clauses): Reject corarrays in allocate/allocators > directive. > * parse.cc (check_omp_allocate_stmt): Permit procedure pointers > here (rejected later) for less misleading diagnostic. > * trans-array.cc (gfc_trans_auto_array_allocation): Propagate > size for GOMP_alloc and location to which it should be added to. > * trans-decl.cc (gfc_trans_deferred_vars): Handle 'omp allocate' > for stack variables; sorry for static variables/common blocks. > * trans-openmp.cc (gfc_trans_omp_clauses): Evaluate 'allocate' > clause's allocator only once; fix adding expressions to the > block. > (gfc_trans_omp_single): Pass a block to gfc_trans_omp_clauses. > > gcc/ChangeLog: > > * gimplify.cc (gimplify_bind_expr): Handle Fortran's > 'omp allocate' for stack variables. > > libgomp/ChangeLog: > > * libgomp.texi (OpenMP Impl. Status): Mention that Fortran now > supports the allocate directive for stack variables. > * testsuite/libgomp.fortran/allocate-5.f90: New test. > * testsuite/libgomp.fortran/allocate-6.f90: New test. > * testsuite/libgomp.fortran/allocate-7.f90: New test. > * testsuite/libgomp.fortran/allocate-8.f90: New test. > > gcc/testsuite/ChangeLog: > > * c-c++-common/gomp/allocate-14.c: Fix directive name. > * c-c++-common/gomp/allocate-15.c: Likewise. > * c-c++-common/gomp/allocate-9.c: Fix comment typo. > * gfortran.dg/gomp/allocate-4.f90: Remove sorry dg-error. > * gfortran.dg/gomp/allocate-7.f90: Likewise. > * gfortran.dg/gomp/allocate-10.f90: New test. > * gfortran.dg/gomp/allocate-11.f90: New test. > * gfortran.dg/gomp/allocate-12.f90: New test. > * gfortran.dg/gomp/allocate-13.f90: New test. > * gfortran.dg/gomp/allocate-14.f90: New test. > * gfortran.dg/gomp/allocate-15.f90: New test. > * gfortran.dg/gomp/allocate-8.f90: New test. > * gfortran.dg/gomp/alloca
Re: [PING] [PATCH] Harmonize headers between both dg-extract-results scripts
Print the "Test run" line. >>>>>> * dg-extract-results.sh: Print the "Host" line. >>>>>> diff --git a/contrib/dg-extract-results.py >>>>>> b/contrib/dg-extract-results.py >>>>>> index 30aa68771d4..34da1808c5f 100644 >>>>>> --- a/contrib/dg-extract-results.py >>>>>> +++ b/contrib/dg-extract-results.py >>>>>> @@ -113,7 +113,7 @@ class Prog: >>>>>> # Whether to create .sum rather than .log output. >>>>>> self.do_sum = True >>>>>> # Regexps used while parsing. >>>>>> -self.test_run_re = re.compile (r'^Test Run By (\S+) on (.*)$') >>>>>> +self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$') >>>>>> self.tool_re = re.compile (r'^\t\t=== (.*) tests ===$') >>>>>> self.result_re = re.compile >>>>>> (r'^(PASS|XPASS|FAIL|XFAIL|UNRESOLVED' >>>>>> >>>>>> r'|WARNING|ERROR|UNSUPPORTED|UNTESTED' >>>>>> diff --git a/contrib/dg-extract-results.sh >>>>>> b/contrib/dg-extract-results.sh >>>>>> index ff6c50d029c..57f6fe0e997 100755 >>>>>> --- a/contrib/dg-extract-results.sh >>>>>> +++ b/contrib/dg-extract-results.sh >>>>>> @@ -271,7 +271,7 @@ cat $SUM_FILES \ >>>>>> >>>>>> # Write the begining of the combined summary file. >>>>>> >>>>>> -head -n 2 $FIRST_SUM >>>>>> +head -n 3 $FIRST_SUM >>>>>> echo >>>>>> echo " === $TOOL tests ===" >>>>>> echo - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [Patch] OpenMP: Add ME support for 'omp allocate' stack variables
Hi Tobias! No need to change anything now, but in case that's useful later: On 2023-09-18T14:22:50+0200, Tobias Burnus wrote: > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/allocate-4.c > @@ -0,0 +1,84 @@ > +/* TODO: move to ../libgomp.c-c++-common once C++ is implemented. */ > +/* NOTE: { target c } is unsupported with with the C compiler. */ > +[...] ..., just noting that '{ target c }', '{ target c++ }' are trivial to implement; see libgomp OpenACC testing: libgomp/testsuite/libgomp.oacc-c/c.exp:proc check_effective_target_c { } { libgomp/testsuite/libgomp.oacc-c/c.exp-return 1 libgomp/testsuite/libgomp.oacc-c/c.exp-} libgomp/testsuite/libgomp.oacc-c/c.exp:proc check_effective_target_c++ { } { libgomp/testsuite/libgomp.oacc-c/c.exp-return 0 libgomp/testsuite/libgomp.oacc-c/c.exp-} libgomp/testsuite/libgomp.oacc-c++/c++.exp:proc check_effective_target_c { } { libgomp/testsuite/libgomp.oacc-c++/c++.exp-return 0 libgomp/testsuite/libgomp.oacc-c++/c++.exp-} libgomp/testsuite/libgomp.oacc-c++/c++.exp:proc check_effective_target_c++ { } { libgomp/testsuite/libgomp.oacc-c++/c++.exp-return 1 libgomp/testsuite/libgomp.oacc-c++/c++.exp-} Grüße Thomas > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/allocate-5.c > @@ -0,0 +1,126 @@ > +/* TODO: move to ../libgomp.c-c++-common once C++ is implemented. */ > +/* NOTE: { target c } is unsupported with with the C compiler. */ > +[...] > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/allocate-6.c > @@ -0,0 +1,319 @@ > +/* TODO: move to ../libgomp.c-c++-common once C++ is implemented. */ > +/* NOTE: { target c } is unsupported with with the C compiler. */ > +[...] - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [Patch] OpenMP: Add ME support for 'omp allocate' stack variables
Hi Tobias! On 2023-10-18T11:53:30+0200, Tobias Burnus wrote: > On 18.10.23 11:44, Thomas Schwinge wrote: >> No need to change anything now, but in case that's useful later: >> [...] >> ..., just noting that '{ target c }', '{ target c++ }' are trivial to >> implement; see libgomp OpenACC testing: >> >> libgomp/testsuite/libgomp.oacc-c/c.exp:proc check_effective_target_c { } { > > Shouldn't we rather do something like the following (untested)? > > --- a/libgomp/testsuite/lib/libgomp.exp > +++ b/libgomp/testsuite/lib/libgomp.exp > @@ -26,2 +26,3 @@ load_gcc_lib timeout.exp > load_gcc_lib file-format.exp > +load_gcc_lib target-supports.exp > load_gcc_lib target-supports-dg.exp 'gcc/testsuite/lib/target-supports.exp' defines: # Return 1 if the language for the compiler under test is C. proc check_effective_target_c { } { global tool if [string match $tool "gcc"] { return 1 } return 0 } # Return 1 if the language for the compiler under test is C++. proc check_effective_target_c++ { } { global tool if { [string match $tool "g++"] || [string match $tool "libstdc++"] } { return 1 } return 0 } However, (per my understanding; not verified) 'tool == libgomp' for libgomp testing, so that doesn't work. Grüße Thomas ----- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [Patch] nvptx: Use fatal_error when -march= is missing not an assert [PR111093]
Hi Tobias! On 2023-10-16T11:18:45+0200, Tobias Burnus wrote: > While mkoffload ensures that there is always a -march=, nvptx's > cc1 can also be run directly. > > In my case, I wanted to know which target-specific #define are > available; hence, I did run: >accel/nvptx-none/cc1 -E -dM < /dev/null > which gave an ICE. After some debugging, the reasons was > clear (missing -march=) but somehow a (fatal) error would have been > nicer than an ICE + debugging. > > OK for mainline? Yes, thanks. I think I prefer this over hard-coding some default 'ptx_isa_option' -- but may be convinced otherwise (incremental change), if that's maybe more convenient for others? (Roger?) Grüße Thomas > nvptx: Use fatal_error when -march= is missing not an assert [PR111093] > > gcc/ChangeLog: > > PR target/111093 > * config/nvptx/nvptx.cc (nvptx_option_override): Issue fatal error > instead of an assert ICE when no -march= has been specified. > > diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc > index edef39fb5e1..634c31673be 100644 > --- a/gcc/config/nvptx/nvptx.cc > +++ b/gcc/config/nvptx/nvptx.cc > @@ -335,8 +335,9 @@ nvptx_option_override (void) >init_machine_status = nvptx_init_machine_status; > >/* Via nvptx 'OPTION_DEFAULT_SPECS', '-misa' always appears on the command > - line. */ > - gcc_checking_assert (OPTION_SET_P (ptx_isa_option)); > + line; but handle the case that the compiler is not run via the driver. > */ > + if (!OPTION_SET_P (ptx_isa_option)) > +fatal_error (UNKNOWN_LOCATION, "%<-march=%> must be specified"); > >handle_ptx_version_option (); > - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Enable top-level recursive 'autoreconf' (was: Hints on reconfiguring GCC)
Hi! On 2023-10-18T15:42:18+0100, R jd <3246251196r...@gmail.com> wrote: > I guess I can ask, why there is not a recursive approach for configuring > GCC. e.g. AC_SUBDIRS in the top level? ('AC_CONFIG_SUBDIRS' you mean.) You know, often it just takes someone to ask the right questions... ;-) What do people think about the attached "Enable top-level recursive 'autoreconf'"? Only lightly tested, so far. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 43127e5643337ca407071ad93bccbc716024352e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 19 Oct 2023 10:28:30 +0200 Subject: [PATCH] Enable top-level recursive 'autoreconf' * configure.ac: At end of file, instantiate 'AC_CONFIG_SUBDIRS' for all relevant directories. * configure: Regenerate. --- configure| 102 ++- configure.ac | 36 ++ 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 8fc163d36bd..fcb4d591334 100755 --- a/configure +++ b/configure @@ -584,7 +584,8 @@ PACKAGE_URL= ac_unique_file="move-if-change" enable_option_checking=no -ac_subst_vars='LTLIBOBJS +ac_subst_vars='subdirs +LTLIBOBJS LIBOBJS compare_exclusions stage2_werror_flag @@ -909,7 +910,37 @@ READELF_FOR_TARGET STRIP_FOR_TARGET WINDRES_FOR_TARGET WINDMC_FOR_TARGET' - +ac_subdirs_all='c++tools +fixincludes +gcc +gcc/m2 +gnattools +gotools +intl +libada +libatomic +libbacktrace +libcc1 +libcody +libcpp +libdecnumber +libffi +libgcc +libgfortran +libgm2 +libgo +libgomp +libiberty +libitm +libobjc +libphobos +libquadmath +libsanitizer +libssp +libstdc++-v3 +libvtv +lto-plugin +zlib' # Initialize some variables set by options. ac_init_help= @@ -20081,3 +20112,70 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + +# Enable top-level recursive 'autoreconf' by enumerating all relevant +# directories here. This is intentionally done at end of 'configure.ac', +# *after* 'AC_OUTPUT', so that we don't attempt to prematurely 'configure' +# these directories when the top-level 'configure' is invoked. +subdirs="$subdirs c++tools" + +subdirs="$subdirs fixincludes" + +subdirs="$subdirs gcc" + +subdirs="$subdirs gcc/m2" + +subdirs="$subdirs gnattools" + +subdirs="$subdirs gotools" + +subdirs="$subdirs intl" + +subdirs="$subdirs libada" + +subdirs="$subdirs libatomic" + +subdirs="$subdirs libbacktrace" + +subdirs="$subdirs libcc1" + +subdirs="$subdirs libcody" + +subdirs="$subdirs libcpp" + +subdirs="$subdirs libdecnumber" + +subdirs="$subdirs libffi" + +subdirs="$subdirs libgcc" + +subdirs="$subdirs libgfortran" + +subdirs="$subdirs libgm2" + +subdirs="$subdirs libgo" + +subdirs="$subdirs libgomp" + +subdirs="$subdirs libiberty" + +subdirs="$subdirs libitm" + +subdirs="$subdirs libobjc" + +subdirs="$subdirs libphobos" + +subdirs="$subdirs libquadmath" + +subdirs="$subdirs libsanitizer" + +subdirs="$subdirs libssp" + +subdirs="$subdirs libstdc++-v3" + +subdirs="$subdirs libvtv" + +subdirs="$subdirs lto-plugin" + +subdirs="$subdirs zlib" + diff --git a/configure.ac b/configure.ac index 1d16530140a..0d37d30196e 100644 --- a/configure.ac +++ b/configure.ac @@ -3944,3 +3944,39 @@ AC_CONFIG_FILES([Makefile], extrasub_host="$extrasub_host" extrasub_target="$extrasub_target"]) AC_OUTPUT + +# Enable top-level recursive 'autoreconf' by enumerating all relevant +# directories here. This is intentionally done at end of 'configure.ac', +# *after* 'AC_OUTPUT', so that we don't attempt to prematurely 'configure' +# these directories when the top-level 'configure' is invoked. +AC_CONFIG_SUBDIRS([c++tools]) +AC_CONFIG_SUBDIRS([fixincludes]) +AC_CONFIG_SUBDIRS([gcc]) +AC_CONFIG_SUBDIRS([gcc/m2]) +AC_CONFIG_SUBDIRS([gnattools]) +AC_CONFIG_SUBDIRS([gotools]) +AC_CONFIG_SUBDIRS([intl]) +AC_CONFIG_SUBDIRS([libada]) +AC_CONFIG_SUBDIRS([libatomic]) +AC_CONFIG_SUBDIRS([libbacktrace]) +AC_CONFIG_SUBDIRS([libcc1]) +AC_CONFIG_SUBDIRS([libcody]) +AC_CONFIG_SUBDIRS([libcpp]) +AC_CONFIG_SUBDIRS([libdecnumber]) +AC_CONFIG_SUBDIRS([libffi]) +AC_CONFIG_SUBDIRS([libgcc]) +AC_CONFIG_SUBDIRS([libgfortran]) +AC_CONFIG_SUBDIRS([libgm2]) +AC_CONFIG_SUBDIRS([libgo]) +AC_CONFIG_SUBDIRS([libgomp]) +AC_CONFIG_SUBDIRS([libiberty]) +AC_CONFIG_SUBDIRS([libitm]) +AC_CONFIG_SUBDIRS([libobjc]) +AC_CONFIG_SUBDIRS([libphobos]) +AC_CONFIG_SUBDIRS([libquadmath]) +AC_CONFIG_SUBDIRS([libsanitizer]) +AC_CONFIG_SUBDIRS([libssp]) +AC_CONFIG_SUBDIRS([libstdc++-v3]) +AC_CONFIG_SUBDIRS([libvtv]) +AC_CONFIG_SUBDIRS([lto-plugin]) +AC_CONFIG_SUBDIRS([zlib]) -- 2.34.1
Re: Enable top-level recursive 'autoreconf'
Hi! On 2023-10-19T11:57:33+0200, Andreas Schwab wrote: > On Okt 19 2023, Thomas Schwinge wrote: >> On 2023-10-18T15:42:18+0100, R jd <3246251196r...@gmail.com> wrote: >>> I guess I can ask, why there is not a recursive approach for configuring >>> GCC. e.g. AC_SUBDIRS in the top level? >> >> ('AC_CONFIG_SUBDIRS' you mean.) You know, often it just takes someone to >> ask the right questions... ;-) >> >> What do people think about the attached >> "Enable top-level recursive 'autoreconf'"? Only lightly tested, so far. > > The top-level files are shared with binutils-gdb, which has a different > set of subdirs. Good point, thanks! Fortunately, the failure mode for non-existing directories is non-fatal (skipped with 'subdirectory [...] not present' diagnostic); with my original "Enable top-level recursive 'autoreconf'" (also re-attached) applied to Binutils/GDB Git master branch, we get: $ PATH=[...] autoreconf -v autoreconf: Entering directory `.' autoreconf: configure.ac: not using Gettext autoreconf: running: aclocal autoreconf: configure.ac: tracing autoreconf: configure.ac: subdirectory c++tools not present autoreconf: configure.ac: subdirectory fixincludes not present autoreconf: configure.ac: subdirectory gcc not present autoreconf: configure.ac: subdirectory gcc/m2 not present autoreconf: configure.ac: subdirectory gnattools not present autoreconf: configure.ac: subdirectory gotools not present autoreconf: configure.ac: adding subdirectory intl to autoreconf autoreconf: Entering directory `intl' [...] autoreconf: Leaving directory `intl' autoreconf: configure.ac: subdirectory libada not present autoreconf: configure.ac: subdirectory libatomic not present autoreconf: configure.ac: adding subdirectory libbacktrace to autoreconf autoreconf: Entering directory `libbacktrace' [...] So we could (a) simply list *all* directories in the shared top-level 'configure.ac', or (b) configure GCC vs. other projrcts via a non-shared file ('m4_include([config/AC_CONFIG_SUBDIRS.m4])' or similar -- is there an established procedure for non-shared top-level files)? (I don't have a strong preference either way.) It's just GCC and Binutils/GDB, or are the top-level files also shared with additional projects? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 43127e5643337ca407071ad93bccbc716024352e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 19 Oct 2023 10:28:30 +0200 Subject: [PATCH] Enable top-level recursive 'autoreconf' * configure.ac: At end of file, instantiate 'AC_CONFIG_SUBDIRS' for all relevant directories. * configure: Regenerate. --- configure| 102 ++- configure.ac | 36 ++ 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 8fc163d36bd..fcb4d591334 100755 --- a/configure +++ b/configure @@ -584,7 +584,8 @@ PACKAGE_URL= ac_unique_file="move-if-change" enable_option_checking=no -ac_subst_vars='LTLIBOBJS +ac_subst_vars='subdirs +LTLIBOBJS LIBOBJS compare_exclusions stage2_werror_flag @@ -909,7 +910,37 @@ READELF_FOR_TARGET STRIP_FOR_TARGET WINDRES_FOR_TARGET WINDMC_FOR_TARGET' - +ac_subdirs_all='c++tools +fixincludes +gcc +gcc/m2 +gnattools +gotools +intl +libada +libatomic +libbacktrace +libcc1 +libcody +libcpp +libdecnumber +libffi +libgcc +libgfortran +libgm2 +libgo +libgomp +libiberty +libitm +libobjc +libphobos +libquadmath +libsanitizer +libssp +libstdc++-v3 +libvtv +lto-plugin +zlib' # Initialize some variables set by options. ac_init_help= @@ -20081,3 +20112,70 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + +# Enable top-level recursive 'autoreconf' by enumerating all relevant +# directories here. This is intentionally done at end of 'configure.ac', +# *after* 'AC_OUTPUT', so that we don't attempt to prematurely 'configure' +# these directories when the top-level 'configure' is invoked. +subdirs="$subdirs c++tools" + +subdirs="$subdirs fixincludes" + +subdirs="$subdirs gcc" + +subdirs="$subdirs gcc/m2" + +subdirs="$subdirs gnattools" + +subdirs="$subdirs gotools" + +subdirs="$subdirs intl" + +subdirs="$subdir
[patch, fortran] Asynchronous I/O, take 3
Hello world, the attached patch is the third take on Nicolas' and my patch for implementing asynchronous I/O. Some parts have been reworked, and several bugs which caused either incorrect I/O or hangs have been fixed in the process. I have to say that getting out these bugs has been much harder than Nicolas and I originally thought, and that this has cost more working hours than any other patch I have been involved in. This has been regression-tested on x86_64-pc-linux-gnu. The new test cases have also been tested in a tight loop with n=1; while ./a.out; do echo -n $n " " ; n=$((n+1)); done or (for async_io_3.f90, which is supposed to fail) while true ; do ./a.out > /dev/null 2>&1 ; echo -n $n " " ; n=$((n+1)); done and the test cases also come up clean with valgrind --tool=drd (which is a _very_ strict tool which, after this experience, I wholeheartedly recommend for doing pthreads debugging). The interface remains as before - link in pthread to get asynchronous I/O, which matches what ifort does. So, OK for trunk? Regards Thomas 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.texi: Add description of asynchronous I/O. * trans-decl.c (gfc_finish_var_decl): Treat asynchronous variables as volatile. * trans-io.c (gfc_build_io_library_fndecls): Rename st_wait to st_wait_async and change argument spec from ".X" to ".w". (gfc_trans_wait): Pass ID argument via reference. 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.dg/f2003_inquire_1.f03: Add write statement. * gfortran.dg/f2003_io_1.f03: Add wait statement. 2018-01-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * Makefile.am: Add async.c to gfor_io_src. Add async.h to gfor_io_headers. * Makefile.in: Regenerated. * gfortran.map: Add _gfortran_st_wait_async. * io/async.c: New file. * io/async.h: New file. * io/close.c: Include async.h. (st_close): Call async_wait for an asynchronous unit. * io/file_pos.c (st_backspace): Likewise. (st_endfile): Likewise. (st_rewind): Likewise. (st_flush): Likewise. * io/inquire.c: Add handling for asynchronous PENDING and ID arguments. * io/io.h (st_parameter_dt): Add async bit. (st_parameter_wait): Correct. (gfc_unit): Add au pointer. (st_wait_async): Add prototype. (transfer_array_inner): Likewise. (st_write_done_worker): Likewise. * io/open.c: Include async.h. (new_unit): Initialize asynchronous unit. * io/transfer.c (async_opt): New struct. (wrap_scalar_transfer): New function. (transfer_integer): Call wrap_scalar_transfer to do the work. (transfer_real): Likewise. (transfer_real_write): Likewise. (transfer_character): Likewise. (transfer_character_wide): Likewise. (transfer_complex): Likewise. (transfer_array_inner): New function. (transfer_array): Call transfer_array_inner. (transfer_derived): Call wrap_scalar_transfer. (data_transfer_init): Check for asynchronous I/O. Perform a wait operation on any pending asynchronous I/O if the data transfer is synchronous. Copy PDT and enqueue thread for data transfer. (st_read_done_worker): New function. (st_read_done): Enqueue transfer or call st_read_done_worker. (st_write_done_worker): New function. (st_write_done): Enqueue transfer or call st_read_done_worker. (st_wait): Document as no-op for compatibility reasons. (st_wait_async): New function. * io/unit.c (insert_unit): Use macros LOCK, UNLOCK and TRYLOCK; add NOTE where necessary. (get_gfc_unit): Likewise. (init_units): Likewise. (close_unit_1): Likewise. Call async_close if asynchronous. (close_unit): Use macros LOCK and UNLOCK. (finish_last_advance_record): Likewise. (newunit_alloc): Likewise. * io/unix.c (find_file): Likewise. (flush_all_units_1): Likewise. (flush_all_units): Likewise. * libgfortran.h (generate_error_common): Add prototype. * runtime/error.c: Include io.h and async.h. (generate_error_common): New function. 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * testsuite/libgfomp.fortran/async_io_1.f90: New test. * testsuite/libgfomp.fortran/async_io_2.f90: New test. * testsuite/libgfomp.fortran/async_io_3.f90: New test. Index: gcc/fortran/gfortran.texi === --- gcc/fortran/gfortran.texi (Revision 259739) +++ gcc/fortran/gfortran.texi (Arbeitskopie) @@ -882,8 +882,7 @@ than @code{(/.../)}.
Re: [patch, fortran] Asynchronous I/O, take 3
Hi Rainer, However, may (all?) gfortran tests now SEGV. One example is Program received signal SIGSEGV: Segmentation fault - invalid memory reference. Backtrace for this error: Segmentation Fault Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1 (LWP 1)] 0xfe1b1f03 in pthread_mutex_unlock () from /lib/libc.so.1 (gdb) where #0 0xfe1b1f03 in pthread_mutex_unlock () from /lib/libc.so.1 #1 0xfe5d1b7c in __gthread_mutex_unlock (__mutex=0x18) at ../libgcc/gthr-default.h:778 #2 _gfortran_st_rewind (fpp=0xfeffda9c) at /vol/gcc/src/hg/trunk/solaris/libgfortran/io/file_pos.c:486 #3 0x0805110f in MAIN__ () at /vol/gcc/src/hg/trunk/solaris/gcc/testsuite/gfortran.dg/backslash_2.f90:6 Ah, I see what was wrong. The attached patch should fix this. I have also attached a new test case which detects this error even on Linux systems, plus a ChangeLog which fixes the typo :-) Again regression-tested. So, OK for trunk? Regards Thomas 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.texi: Add description of asynchronous I/O. * trans-decl.c (gfc_finish_var_decl): Treat asynchronous variables as volatile. * trans-io.c (gfc_build_io_library_fndecls): Rename st_wait to st_wait_async and change argument spec from ".X" to ".w". (gfc_trans_wait): Pass ID argument via reference. 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.dg/f2003_inquire_1.f03: Add write statement. * gfortran.dg/f2003_io_1.f03: Add wait statement. 2018-01-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * Makefile.am: Add async.c to gfor_io_src. Add async.h to gfor_io_headers. * Makefile.in: Regenerated. * gfortran.map: Add _gfortran_st_wait_async. * io/async.c: New file. * io/async.h: New file. * io/close.c: Include async.h. (st_close): Call async_wait for an asynchronous unit. * io/file_pos.c (st_backspace): Likewise. (st_endfile): Likewise. (st_rewind): Likewise. (st_flush): Likewise. * io/inquire.c: Add handling for asynchronous PENDING and ID arguments. * io/io.h (st_parameter_dt): Add async bit. (st_parameter_wait): Correct. (gfc_unit): Add au pointer. (st_wait_async): Add prototype. (transfer_array_inner): Likewise. (st_write_done_worker): Likewise. * io/open.c: Include async.h. (new_unit): Initialize asynchronous unit. * io/transfer.c (async_opt): New struct. (wrap_scalar_transfer): New function. (transfer_integer): Call wrap_scalar_transfer to do the work. (transfer_real): Likewise. (transfer_real_write): Likewise. (transfer_character): Likewise. (transfer_character_wide): Likewise. (transfer_complex): Likewise. (transfer_array_inner): New function. (transfer_array): Call transfer_array_inner. (transfer_derived): Call wrap_scalar_transfer. (data_transfer_init): Check for asynchronous I/O. Perform a wait operation on any pending asynchronous I/O if the data transfer is synchronous. Copy PDT and enqueue thread for data transfer. (st_read_done_worker): New function. (st_read_done): Enqueue transfer or call st_read_done_worker. (st_write_done_worker): New function. (st_write_done): Enqueue transfer or call st_read_done_worker. (st_wait): Document as no-op for compatibility reasons. (st_wait_async): New function. * io/unit.c (insert_unit): Use macros LOCK, UNLOCK and TRYLOCK; add NOTE where necessary. (get_gfc_unit): Likewise. (init_units): Likewise. (close_unit_1): Likewise. Call async_close if asynchronous. (close_unit): Use macros LOCK and UNLOCK. (finish_last_advance_record): Likewise. (newunit_alloc): Likewise. * io/unix.c (find_file): Likewise. (flush_all_units_1): Likewise. (flush_all_units): Likewise. * libgfortran.h (generate_error_common): Add prototype. * runtime/error.c: Include io.h and async.h. (generate_error_common): New function. 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * testsuite/libgomp.fortran/async_io_1.f90: New test. * testsuite/libgomp.fortran/async_io_2.f90: New test. * testsuite/libgomp.fortran/async_io_3.f90: New test. * testsuite/libgomp.fortran/async_io_4.f90: New test. Obviously __mutex above hasn't been properly initialized. 2018-07-02 Nicolas Koenig Thomas Koenig PR fortran/25829 * testsuite/libgfomp.fortran/async_io_1.f90: New test. * testsuite/libgfomp.fortran/async_io_2.f90: New test. * t
[PATCH, ARM] PR85434: Prevent spilling of stack protector guard's address on ARM
In case of high register pressure in PIC mode, address of the stack protector's guard can be spilled on ARM targets as shown in PR85434, thus allowing an attacker to control what the canary would be compared against. ARM does lack stack_protect_set and stack_protect_test insn patterns, defining them does not help as the address is expanded regularly and the patterns only deal with the copy and test of the guard with the canary. This problem does not occur for x86 targets because the PIC access and the test can be done in the same instruction. Aarch64 is exempt too because PIC access insn pattern are mov of UNSPEC which prevents it from the second access in the epilogue being CSEd in cse_local pass with the first access in the prologue. The approach followed here is to create new "combined" set and test standard pattern names that take the unexpanded guard and do the set or test. This allows the target to use an opaque pattern (eg. using UNSPEC) to hide the individual instructions being generated to the compiler and split the pattern into generic load, compare and branch instruction after register allocator, therefore avoiding any spilling. This is here implemented for the ARM targets. For targets not implementing these new standard pattern names, the existing stack_protect_set and stack_protect_test pattern names are used. To be able to split PIC access after register allocation, the functions had to be augmented to force a new PIC register load and to control which register it loads into. This is because sharing the PIC register between prologue and epilogue could lead to spilling due to CSE again which an attacker could use to control what the canary gets compared against. ChangeLog entries are as follows: *** gcc/ChangeLog *** 2018-07-05 Thomas Preud'homme PR target/85434 * target-insns.def (stack_protect_combined_set): Define new standard pattern name. (stack_protect_combined_test): Likewise. * cfgexpand.c (stack_protect_prologue): Try new stack_protect_combined_set pattern first. * function.c (stack_protect_epilogue): Try new stack_protect_combined_test pattern first. * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now parameters to control which register to use as PIC register and force reloading PIC register respectively. (legitimize_pic_address): Expose above new parameters in prototype and adapt recursive calls accordingly. (arm_legitimize_address): Adapt to new legitimize_pic_address prototype. (thumb_legitimize_address): Likewise. (arm_emit_call_insn): Adapt to new require_pic_register prototype. * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype change. * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address prototype change. (stack_protect_combined_set): New insn_and_split pattern. (stack_protect_set): New insn pattern. (stack_protect_combined_test): New insn_and_split pattern. (stack_protect_test): New insn pattern. * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. (UNSPEC_SP_TEST): Likewise. * doc/md.texi (stack_protect_combined_set): Document new standard pattern name. (stack_protect_set): Clarify that the operand for guard's address is legal. (stack_protect_combined_test): Document new standard pattern name. (stack_protect_test): Clarify that the operand for guard's address is legal. *** gcc/testsuite/ChangeLog *** 2018-07-05 Thomas Preud'homme PR target/85434 * gcc.target/arm/pr85434.c: New test. Testing: Bootstrapped on ARM in both Arm and Thumb-2 mode as well as on Aarch64. Testsuite shows no regression on these 3 variants either both with default flags and with -fstack-protector-all. Is this ok for trunk? If yes, would this be acceptable as a backport to GCC 6, 7 and 8 provided that no regression is found? Best regards, Thomas From d917d48c2005e46154383589f203d06f3c6167e0 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Tue, 8 May 2018 15:47:05 +0100 Subject: [PATCH] PR85434: Prevent spilling of stack protector guard's address on ARM In case of high register pressure in PIC mode, address of the stack protector's guard can be spilled on ARM targets as shown in PR85434, thus allowing an attacker to control what the canary would be compared against. ARM does lack stack_protect_set and stack_protect_test insn patterns, defining them does not help as the address is expanded regularly and the patterns only deal with the copy and test of the guard with the canary. This problem does not occur for x86 targets because the PIC access and the test can be done in the same instruction. Aarch64 is exempt too because PIC access insn pattern are mov of UNSPEC which prevents it from the second access in the epilogue being CSEd in cse_local pass with the first access in the prologue. The approach followed here is to create
Re: [PATCH, ARM] PR85434: Prevent spilling of stack protector guard's address on ARM
Adding Jeff and Eric since the patch adds an RTL target hook. Best regards, Thomas On Thu, 5 Jul 2018 at 15:48, Thomas Preudhomme wrote: > > In case of high register pressure in PIC mode, address of the stack > protector's guard can be spilled on ARM targets as shown in PR85434, > thus allowing an attacker to control what the canary would be compared > against. ARM does lack stack_protect_set and stack_protect_test insn > patterns, defining them does not help as the address is expanded > regularly and the patterns only deal with the copy and test of the > guard with the canary. > > This problem does not occur for x86 targets because the PIC access and > the test can be done in the same instruction. Aarch64 is exempt too > because PIC access insn pattern are mov of UNSPEC which prevents it from > the second access in the epilogue being CSEd in cse_local pass with the > first access in the prologue. > > The approach followed here is to create new "combined" set and test > standard pattern names that take the unexpanded guard and do the set or > test. This allows the target to use an opaque pattern (eg. using UNSPEC) > to hide the individual instructions being generated to the compiler and > split the pattern into generic load, compare and branch instruction > after register allocator, therefore avoiding any spilling. This is here > implemented for the ARM targets. For targets not implementing these new > standard pattern names, the existing stack_protect_set and > stack_protect_test pattern names are used. > > To be able to split PIC access after register allocation, the functions > had to be augmented to force a new PIC register load and to control > which register it loads into. This is because sharing the PIC register > between prologue and epilogue could lead to spilling due to CSE again > which an attacker could use to control what the canary gets compared > against. > > ChangeLog entries are as follows: > > *** gcc/ChangeLog *** > > 2018-07-05 Thomas Preud'homme > > PR target/85434 > * target-insns.def (stack_protect_combined_set): Define new standard > pattern name. > (stack_protect_combined_test): Likewise. > * cfgexpand.c (stack_protect_prologue): Try new > stack_protect_combined_set pattern first. > * function.c (stack_protect_epilogue): Try new > stack_protect_combined_test pattern first. > * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now > parameters to control which register to use as PIC register and force > reloading PIC register respectively. > (legitimize_pic_address): Expose above new parameters in prototype and > adapt recursive calls accordingly. > (arm_legitimize_address): Adapt to new legitimize_pic_address > prototype. > (thumb_legitimize_address): Likewise. > (arm_emit_call_insn): Adapt to new require_pic_register prototype. > * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype > change. > * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address > prototype change. > (stack_protect_combined_set): New insn_and_split pattern. > (stack_protect_set): New insn pattern. > (stack_protect_combined_test): New insn_and_split pattern. > (stack_protect_test): New insn pattern. > * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. > (UNSPEC_SP_TEST): Likewise. > * doc/md.texi (stack_protect_combined_set): Document new standard > pattern name. > (stack_protect_set): Clarify that the operand for guard's address is > legal. > (stack_protect_combined_test): Document new standard pattern name. > (stack_protect_test): Clarify that the operand for guard's address is > legal. > > *** gcc/testsuite/ChangeLog *** > > 2018-07-05 Thomas Preud'homme > > PR target/85434 > * gcc.target/arm/pr85434.c: New test. > > Testing: Bootstrapped on ARM in both Arm and Thumb-2 mode as well as on > Aarch64. Testsuite shows no regression on these 3 variants either both > with default flags and with -fstack-protector-all. > > Is this ok for trunk? If yes, would this be acceptable as a backport to > GCC 6, 7 and 8 provided that no regression is found? > > Best regards, > > Thomas From d917d48c2005e46154383589f203d06f3c6167e0 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Tue, 8 May 2018 15:47:05 +0100 Subject: [PATCH] PR85434: Prevent spilling of stack protector guard's address on ARM In case of high register pressure in PIC mode, address of the stack protector's guard can be spilled on ARM targets as shown in PR85434, thus allowing an attacker to control what the canary would be compared aga
--enable-maintainer-mode currently broken, needs --disable-werror to complete bootstrap
Hi, this is a heads-up that configuring with --enable-maintainer-mode currently breaks bootstrap; see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86450 for details. Running configure with --enable-maintainer-mode --disable-werror allows bootstrap to proceed until the underlying issue is fixed. Regards Thomas
Re: [Patch, Fortran] PR 85599: warn about short-circuiting of logical expressions for non-pure functions
Hi Janus, The cleaner approach would certainly be to avoid short-circuiting of impure functions altogether. If we can all agree that this is a good idea, This is a fine example of logical short-circuiting - the condition you mention is false, therefore the rest need not be evaluated :-)
Re: [patch, fortran] Asynchronous I/O, take 3
Hi everybody, I am currently testing the patch at https://gcc.gnu.org/ml/fortran/2018-07/msg8.html so far, so good! IMO the tests should go to gfortran.dg (they pass my tests). I put the asycn_io_*.f90 tests into libgomp.fortran because, under Linux, gfortran.dg does not link in pthreads, so the tests would not be executed in parallel, and some of them would fail. So, here is the final version. I would really like to get this into trunk, and out of the way, so Nicolas and I can focus on other things. So, OK? Regards Thomas 2018-07-15 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.texi: Add description of asynchronous I/O. * trans-decl.c (gfc_finish_var_decl): Treat asynchronous variables as volatile. * trans-io.c (gfc_build_io_library_fndecls): Rename st_wait to st_wait_async and change argument spec from ".X" to ".w". (gfc_trans_wait): Pass ID argument via reference. 2018-07-15 Nicolas Koenig Thomas Koenig PR fortran/25829 * gfortran.dg/f2003_inquire_1.f03: Add write statement. * gfortran.dg/f2003_io_1.f03: Add wait statement. 2018-01-15 Nicolas Koenig Thomas Koenig PR fortran/25829 * Makefile.am: Add async.c to gfor_io_src. Add async.h to gfor_io_headers. * Makefile.in: Regenerated. * gfortran.map: Add _gfortran_st_wait_async. * io/async.c: New file. * io/async.h: New file. * io/close.c: Include async.h. (st_close): Call async_wait for an asynchronous unit. * io/file_pos.c (st_backspace): Likewise. (st_endfile): Likewise. (st_rewind): Likewise. (st_flush): Likewise. * io/inquire.c: Add handling for asynchronous PENDING and ID arguments. * io/io.h (st_parameter_dt): Add async bit. (st_parameter_wait): Correct. (gfc_unit): Add au pointer. (st_wait_async): Add prototype. (transfer_array_inner): Likewise. (st_write_done_worker): Likewise. * io/open.c: Include async.h. (new_unit): Initialize asynchronous unit. * io/transfer.c (async_opt): New struct. (wrap_scalar_transfer): New function. (transfer_integer): Call wrap_scalar_transfer to do the work. (transfer_real): Likewise. (transfer_real_write): Likewise. (transfer_character): Likewise. (transfer_character_wide): Likewise. (transfer_complex): Likewise. (transfer_array_inner): New function. (transfer_array): Call transfer_array_inner. (transfer_derived): Call wrap_scalar_transfer. (data_transfer_init): Check for asynchronous I/O. Perform a wait operation on any pending asynchronous I/O if the data transfer is synchronous. Copy PDT and enqueue thread for data transfer. (st_read_done_worker): New function. (st_read_done): Enqueue transfer or call st_read_done_worker. (st_write_done_worker): New function. (st_write_done): Enqueue transfer or call st_read_done_worker. (st_wait): Document as no-op for compatibility reasons. (st_wait_async): New function. * io/unit.c (insert_unit): Use macros LOCK, UNLOCK and TRYLOCK; add NOTE where necessary. (get_gfc_unit): Likewise. (init_units): Likewise. (close_unit_1): Likewise. Call async_close if asynchronous. (close_unit): Use macros LOCK and UNLOCK. (finish_last_advance_record): Likewise. (newunit_alloc): Likewise. * io/unix.c (find_file): Likewise. (flush_all_units_1): Likewise. (flush_all_units): Likewise. * libgfortran.h (generate_error_common): Add prototype. * runtime/error.c: Include io.h and async.h. (generate_error_common): New function. 2018-07-15 Nicolas Koenig Thomas Koenig PR fortran/25829 * testsuite/libgomp.fortran/async_io_1.f90: New test. * testsuite/libgomp.fortran/async_io_2.f90: New test. * testsuite/libgomp.fortran/async_io_3.f90: New test. * testsuite/libgomp.fortran/async_io_4.f90: New test. * testsuite/libgomp.fortran/async_io_5.f90: New test. * testsuite/libgomp.fortran/async_io_6.f90: New test. * testsuite/libgomp.fortran/async_io_7.f90: New test. Index: gcc/fortran/gfortran.texi === --- gcc/fortran/gfortran.texi (Revision 259739) +++ gcc/fortran/gfortran.texi (Arbeitskopie) @@ -882,8 +882,7 @@ than @code{(/.../)}. Type-specification for array @item Extensions to the specification and initialization expressions, including the support for intrinsics with real and complex arguments. -@item Support for the asynchronous input/output syntax; however, the -data transfer is currently always synchronously performed. +@
Re: [patch, fortran] Asynchronous I/O, take 3
Hi Rainer, I've now regtested the patch on i386-pc-solaris2.11 and sparc-sun-solaris2.11: no regressions and the new tests all PASS. Thanks, that is good news! However, I still don't understand why you insist on the hack with putting the async_io_*.f90 tests into the libgomp testsuite. Why not just make the pthread requirement explicit with { dg-require-effective-target pthread } { dg-additional-options "-pthread" } and put them in gfortran.dg where they belong? Because this does not appear to work with Linux. I, like most gfortran developers, work on Linux, and I would like to catch any failure during regression-testing on my own system, if possible. We have had this discussion with Jakub, and he advised us to put all the stuff requiring pthreads into libgomp. It is debatable if this is a good thing, or if we should at least make one round of tests with -pthread enabled. However, this is something for the future, and requires knowledge of dejagnu that I don't currently have :-) Regards Thomas
Re: [patch, fortran] Asynchronous I/O, take 3
Am 15.07.2018 um 19:47 schrieb Rainer Orth: Because this does not appear to work with Linux. I, like most gfortran developers, work on Linux, and I would like to catch any failure during regression-testing on my own system, if possible. huh, what doesn't work? I've just finished an x86_64-pc-linux-gnu bootstrap with your patch included, added the above to the async_io_?.f90 tests, linked them to gfortran.dg and ran the tests there (both 32 and 64-bit multilibs), all PASSed and I verified that they were linked with -lpthread. We have had this discussion with Jakub, and he advised us to put all the stuff requiring pthreads into libgomp. Do you have a pointer to that previous discussion? https://gcc.gnu.org/ml/fortran/2018-04/msg00048.html is what I based my recollection on. Regards Thomas
Re: [Patch, Fortran] PR 85599: warn about short-circuiting of logical expressions for non-pure functions
Hi Janus, I tested it on a fairly large code base and found no further false positives. Also it still regtests cleanly. Ok for trunk? while I still disagree with this on principle, I will not stand in the way. However, one point: I think that the warning should be under a separate warning, which should then be enabled by -Wextra. -Waggressive-function-elimination, could be reused for this, or something else Regards Thomas
Re: [Patch, Fortran] PR 85599: warn about short-circuiting of logical expressions for non-pure functions
Am 16.07.2018 um 10:06 schrieb Janus Weil: However, one point: I think that the warning should be under a separate warning, which should then be enabled by -Wextra. -Waggressive-function-elimination, could be reused for this, or something else I don't actually see such a flag in the manual. Ah, sorry, I misremembered the option, it is actually -Wfunction-elimination. What I would suggest is to enable -Wfunction-eliminiation with -Wextra and also use that for your new warning. (I would also suggest to enable -faggressive-function-elimination at least for -Ofast, but that is another matter). Regards Thomas
Re: [PATCH, ARM] PR85434: Prevent spilling of stack protector guard's address on ARM
Fixed in attached patch. ChangeLog entries are unchanged: *** gcc/ChangeLog *** 2018-07-05 Thomas Preud'homme PR target/85434 * target-insns.def (stack_protect_combined_set): Define new standard pattern name. (stack_protect_combined_test): Likewise. * cfgexpand.c (stack_protect_prologue): Try new stack_protect_combined_set pattern first. * function.c (stack_protect_epilogue): Try new stack_protect_combined_test pattern first. * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now parameters to control which register to use as PIC register and force reloading PIC register respectively. (legitimize_pic_address): Expose above new parameters in prototype and adapt recursive calls accordingly. (arm_legitimize_address): Adapt to new legitimize_pic_address prototype. (thumb_legitimize_address): Likewise. (arm_emit_call_insn): Adapt to new require_pic_register prototype. * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype change. * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address prototype change. (stack_protect_combined_set): New insn_and_split pattern. (stack_protect_set): New insn pattern. (stack_protect_combined_test): New insn_and_split pattern. (stack_protect_test): New insn pattern. * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. (UNSPEC_SP_TEST): Likewise. * doc/md.texi (stack_protect_combined_set): Document new standard pattern name. (stack_protect_set): Clarify that the operand for guard's address is legal. (stack_protect_combined_test): Document new standard pattern name. (stack_protect_test): Clarify that the operand for guard's address is legal. *** gcc/testsuite/ChangeLog *** 2018-07-05 Thomas Preud'homme PR target/85434 * gcc.target/arm/pr85434.c: New test. Best regards, Thomas On Mon, 16 Jul 2018 at 22:46, Jeff Law wrote: > > On 07/05/2018 08:48 AM, Thomas Preudhomme wrote: > > In case of high register pressure in PIC mode, address of the stack > > protector's guard can be spilled on ARM targets as shown in PR85434, > > thus allowing an attacker to control what the canary would be compared > > against. ARM does lack stack_protect_set and stack_protect_test insn > > patterns, defining them does not help as the address is expanded > > regularly and the patterns only deal with the copy and test of the > > guard with the canary. > > > > This problem does not occur for x86 targets because the PIC access and > > the test can be done in the same instruction. Aarch64 is exempt too > > because PIC access insn pattern are mov of UNSPEC which prevents it from > > the second access in the epilogue being CSEd in cse_local pass with the > > first access in the prologue. > > > > The approach followed here is to create new "combined" set and test > > standard pattern names that take the unexpanded guard and do the set or > > test. This allows the target to use an opaque pattern (eg. using UNSPEC) > > to hide the individual instructions being generated to the compiler and > > split the pattern into generic load, compare and branch instruction > > after register allocator, therefore avoiding any spilling. This is here > > implemented for the ARM targets. For targets not implementing these new > > standard pattern names, the existing stack_protect_set and > > stack_protect_test pattern names are used. > > > > To be able to split PIC access after register allocation, the functions > > had to be augmented to force a new PIC register load and to control > > which register it loads into. This is because sharing the PIC register > > between prologue and epilogue could lead to spilling due to CSE again > > which an attacker could use to control what the canary gets compared > > against. > > > > ChangeLog entries are as follows: > > > > *** gcc/ChangeLog *** > > > > 2018-07-05 Thomas Preud'homme > > > > PR target/85434 > > * target-insns.def (stack_protect_combined_set): Define new standard > > pattern name. > > (stack_protect_combined_test): Likewise. > > * cfgexpand.c (stack_protect_prologue): Try new > > stack_protect_combined_set pattern first. > > * function.c (stack_protect_epilogue): Try new > > stack_protect_combined_test pattern first. > > * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now > > parameters to control which register to use as PIC register and force > > reloading PIC register respectively. > > (legitimize_pic_address): Expose above new parameters in prototype and > &
Re: [PATCH][Fortran] Use MIN/MAX_EXPR for intrinsics or __builtin_fmin/max when appropriate
Hi Kyrill, The current implementation expands to: mvar = a1; if (a2 .op. mvar || isnan (mvar)) mvar = a2; if (a3 .op. mvar || isnan (mvar)) mvar = a3; ... return mvar; That is, if one of the operands is a NaN it will return the other argument. If both (all) are NaNs, it will return NaN. This is the same as the semantics of fmin/max as far as I can tell. I've looked at the F2008 standard, and, interestingly enough, the requirement on MIN and MAX do not mention NaNs at all. 13.7.106 has, for MAX, Result Value. The value of the result is that of the largest argument. plus some stuff about character variables (not relevant here). Similar for MIN. Also, the section on IEEE_ARITHMETIC (14.9) does not mention comparisons; also, "Complete conformance with IEC 60559:1989 is not required", what is required is the correct support for +,-, and *, plus support for / if IEEE_SUPPORT_DIVIDE is covered. So, the Fortran standard does not impose many requirements. I do think that a patch such as yours should not change the current behavior unless we know what it does and do think it is a good idea. Hmm... Having said that, I think we pretty much cover all the corner cases in nan_1.f90, so if that test passes without regression, then that aspect should be fine. Question: You have found an advantage on Aarm64. Do you have access to other architectures so see if there is also a speed advantage, or maybe a disadvantage? Regards Thomas
Re: [Patch, Fortran] PR 85599: warn about short-circuiting of logical expressions for non-pure functions
Am 17.07.2018 um 19:19 schrieb Janus Weil: 2018-07-17 17:18 GMT+02:00 Fritz Reese : 2018-07-17 9:52 GMT+02:00 Janus Weil : In other words: Does it make sense to tone down -Wfunction-elimination, by only warning about impure functions? Here is an update of the patch which does that. Regtesting now ... Would not this break the testcase function_optimize_5.f90 : My regtest says so as well ;) The docs for -Wfunction-elimination would read: Warn if any calls to functions are eliminated by the optimizations enabled by the @option{-ffrontend-optimize} option. This option is implied by @option{-Wextra}. However, with your patch, it should probably read something like "warn if any calls to impure functions are eliminated..." Possibly with an explicit remark indicating that pure functions are not warned. Yes. However, the test case above seems to indicate that the function-elimination optimization is not applied to impure functions anyway (which is good IMHO). If you specify -faggressive-function-elimination, it is also done for impure (and non implicitly-pure) functions. Problem is that, in all probability, nobody uses this option at the moment. It that is true, then my modifications practically disable the old -Wfunction-elimination warnings completely :/ I do not think it would be a problem not to warn for removing calls to pure or implicitly pure fuctions. The test cases can easily be modified not to emit this warning, as you did. As the author of the original test cases, I may be able to say so with a certain amount of credibility. The actual elimination is checked for by counting the function names in the *.original dump file, which is done.
Re: [PATCH][Fortran] Use MIN/MAX_EXPR for intrinsics or __builtin_fmin/max when appropriate
Hi Kyrill, Because the expansion now emits straightline code rather than conditionals and branches it should be easier to optimise in general, so I'd expect this to be an improvement overall. That said, I have benchmarked it on SPEC2017 on aarch64. If you have any benchmarks of interest to you you (or somebody else) can run on a target that you care about I would be very grateful for any results. Well, most people currently use x86_64 for scientific computing, so I would be concerned most about this architecture. As for the test case, min / max performance clearly has an effect on 521.wrf, so this would be ideal. If you could run 521.wrf on x86_64, and find that it does not regress measureably (or even shows an improvement), the patch is OK. I'd be interested in the timings you get. Regards Thomas
Re: [PATCH][Fortran][v2] Use MIN/MAX_EXPR for min/max intrinsics
Hi Kyrlll, > Am 18.07.2018 um 13:17 schrieb Kyrill Tkachov : > > Thomas, Janne, would this relaxation of NaN handling be acceptable given the > benefits > mentioned above? If so, what would be the recommended adjustment to the > nan_1.f90 test? I would be a bit careful about changing behavior in such a major way. What would the results with NaN and infinity then be, with or without optimization? Would the results be consistent with min(nan,num) vs min(num,nan)? Would they be consistent with the new IEEE standard? In general, I think that min(nan,num) should be nan and that our current behavior is not the best. Does anybody have dats points on how this is handled by other compilers? Oh, and if anything is changed, then compile and runtime behavior should always be the same. Regards, Thomas
Re: [PATCH] Show valid options for -march and -mtune in --help=target for arm32 (PR driver/83193).
Hi Martin, Why is this needed when -mfpu does not seem to need it for instance? Regarding the patch: > -print "Name(processor_type) Type(enum processor_type)" > -print "Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n" > +print "Name(processor_type) Type(enum processor_type) ForceHelp" > +print "Known ARM CPUs (for use with the -mtune= options):\n" Why changing the text beyond adding ForceHelp? > +@item ForceHelp > +This property is optional. If present, enum values is printed > +in @option{--help} output. > + are printed Thanks, Thomas On Wed, 18 Jul 2018 at 16:50, Martin Liška wrote: > > Hi. > > This introduces new ForceHelp option flag that helps to > print valid option enum values that are not directly > used as a type of an option. > > May I please ask ARM folks to test the patch? > Thanks, > Martin > > gcc/ChangeLog: > > 2018-07-18 Martin Liska > > PR driver/83193 > * config/arm/arm-tables.opt: Add ForceHelp flag for > processor_type and arch_name enum types. > * config/arm/parsecpu.awk: Likewise. > * doc/options.texi: Document new flag ForceHelp. > * opt-read.awk: Parse ForceHelp and set it in construction. > * optc-gen.awk: Likewise. > * opts.c (print_filtered_help): Handle force_help option. > * opts.h (struct cl_enum): New field force_help. > --- > gcc/config/arm/arm-tables.opt | 6 +++--- > gcc/config/arm/parsecpu.awk | 6 +++--- > gcc/doc/options.texi | 4 > gcc/opt-read.awk | 3 +++ > gcc/optc-gen.awk | 3 ++- > gcc/opts.c| 3 ++- > gcc/opts.h| 3 +++ > 7 files changed, 20 insertions(+), 8 deletions(-) > >
Re: [PATCH, ARM] PR85434: Prevent spilling of stack protector guard's address on ARM
[Dropping Jeff Law from the list since he already commented on the middle end parts] Hi Kyrill, On Thu, 19 Jul 2018 at 12:02, Kyrill Tkachov wrote: > > Hi Thomas, > > On 17/07/18 12:02, Thomas Preudhomme wrote: > > Fixed in attached patch. ChangeLog entries are unchanged: > > > > *** gcc/ChangeLog *** > > > > 2018-07-05 Thomas Preud'homme > > > > PR target/85434 > > * target-insns.def (stack_protect_combined_set): Define new standard > > pattern name. > > (stack_protect_combined_test): Likewise. > > * cfgexpand.c (stack_protect_prologue): Try new > > stack_protect_combined_set pattern first. > > * function.c (stack_protect_epilogue): Try new > > stack_protect_combined_test pattern first. > > * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now > > parameters to control which register to use as PIC register and force > > reloading PIC register respectively. > > (legitimize_pic_address): Expose above new parameters in prototype and > > adapt recursive calls accordingly. > > (arm_legitimize_address): Adapt to new legitimize_pic_address > > prototype. > > (thumb_legitimize_address): Likewise. > > (arm_emit_call_insn): Adapt to new require_pic_register prototype. > > * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype > > change. > > * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address > > prototype change. > > (stack_protect_combined_set): New insn_and_split pattern. > > (stack_protect_set): New insn pattern. > > (stack_protect_combined_test): New insn_and_split pattern. > > (stack_protect_test): New insn pattern. > > * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. > > (UNSPEC_SP_TEST): Likewise. > > * doc/md.texi (stack_protect_combined_set): Document new standard > > pattern name. > > (stack_protect_set): Clarify that the operand for guard's address is > > legal. > > (stack_protect_combined_test): Document new standard pattern name. > > (stack_protect_test): Clarify that the operand for guard's address is > > legal. > > > > *** gcc/testsuite/ChangeLog *** > > > > 2018-07-05 Thomas Preud'homme > > > > PR target/85434 > > * gcc.target/arm/pr85434.c: New test. > > > > Sorry for the delay. Some comments inline. > > Kyrill > > diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c > index d6e3c382085..d1a893ac56e 100644 > --- a/gcc/cfgexpand.c > +++ b/gcc/cfgexpand.c > @@ -6105,8 +6105,18 @@ stack_protect_prologue (void) > { > tree guard_decl = targetm.stack_protect_guard (); > rtx x, y; > + struct expand_operand ops[2]; > > x = expand_normal (crtl->stack_protect_guard); > + create_fixed_operand (&ops[0], x); > + create_fixed_operand (&ops[1], DECL_RTL (guard_decl)); > + /* Allow the target to compute address of Y and copy it to X without > + leaking Y into a register. This combined address + copy pattern allows > + the target to prevent spilling of any intermediate results by splitting > + it after register allocator. */ > + if (maybe_expand_insn (targetm.code_for_stack_protect_combined_set, 2, > ops)) > +return; > + > if (guard_decl) > y = expand_normal (guard_decl); > else > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 8537262ce64..100844e659c 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -67,7 +67,7 @@ extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum > rtx_code); > extern int arm_split_constant (RTX_CODE, machine_mode, rtx, >HOST_WIDE_INT, rtx, rtx, int); > extern int legitimate_pic_operand_p (rtx); > -extern rtx legitimize_pic_address (rtx, machine_mode, rtx); > +extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool); > extern rtx legitimize_tls_address (rtx, rtx); > extern bool arm_legitimate_address_p (machine_mode, rtx, bool); > extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, > int); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index ec3abbcba9f..f4a970580c2 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -7369,20 +7369,26 @@ legitimate_pic_operand_p (rtx x) > } > > /* Record that the current function needs a PIC register. Initialize > - cfun->machine->pic_reg if we have not already done so. */ > + cfun->machine->pic_reg if we have not already done
Re: [PATCH][GCC][Arm] Fix subreg crash in different way by enabling the FP16 pattern unconditionally.
Hi Tamar, On Mon, 23 Jul 2018 at 17:56, Tamar Christina wrote: > > Hi All, > > My previous patch changed arm_can_change_mode_class to allow subregs of > 64bit registers on arm big-endian. However it seems that we can't do this > because a the data in 64 bit VFP registers are stored in little-endian order, > even on big-endian. > > Allowing this change had a knock on effect that caused GCC's no-op detection > to think that loading from the first lane on arm big-endian is a no-op. this > because we can't describe the weird ordering we have on D registers on > big-endian. > > The original issue comes from the fact that the code does > > ... foo (... bar) > { > return bar; > } > > The expansion of the return statement causes GCC to try to return the value in > a register. GCC will try to emit the move then, from MEM to REG (due to the > SSA > temporary.). It checks for a mov optab for this which isn't available and > then tries to do the move in bits using emit_move_multi_word. > > emit_move_multi_word will split the move into sub parts, but then needs to get > the sub parts and does this using subregs, but it's told it can't do subregs! > > The compiler is now stuck in an infinite loop. > > The way this is worked around in the back-end is that we have move patterns in > neon.md that usually just force the register instead of checking with the > back-end. This prevents emit_move_multi_word from being needed. However the > pattern for V4HF and V8HF were guarded by TARGET_NEON && TARGET_FP16. > > I don't believe the TARGET_FP16 guard to be needed, because the pattern > doesn't > actually generate code and requires another pattern for that, and a reg to > reg move > should always be possible anyway. So allowing the force to register here is > safe > and it allows the compiler to generate a correct error instead of ICEing in an > infinite loop. How about subreg to subreg move? Doesn't that expand to more insns (subreg to reg and reg to subreg)? Couldn't you improve the logic to check that there is actually a mode change so that if there isn't (like moving from one subreg to another) just expand to a single move? Best regards, Thomas > > This patch ensures gcc.target/arm/big-endian-subreg.c is fixed without > introducing > any regressions while fixing > > gcc.dg/vect/vect-nop-move.c execution test > g++.dg/torture/vshuf-v2si.C -O3 -g execution test > g++.dg/torture/vshuf-v4si.C -O3 -g execution test > g++.dg/torture/vshuf-v8hi.C -O3 -g execution test > > Regtested on armeb-none-eabi and no regressions. > Bootstrapped on arm-none-linux-gnueabihf and no issues. > > > Ok for trunk? > > Thanks, > Tamar > > gcc/ > 2018-07-23 Tamar Christina > > PR target/84711 > * config/arm/arm.c (arm_can_change_mode_class): Disallow subreg. > * config/arm/neon.md (movv4hf, movv8hf): Refactored to.. > (mov): ..this and enable unconditionally. > > --
Re: [PATCH, ARM] PR85434: Prevent spilling of stack protector guard's address on ARM
Hi Kyrill, Using memory_operand worked, the issues I encountered when using it in earlier versions of the patch must have been due to the missing test on address_operand in the preparation statements which I added later. Please find an updated patch in attachment. ChangeLog entry is as follows: *** gcc/ChangeLog *** 2018-07-05 Thomas Preud'homme * target-insns.def (stack_protect_combined_set): Define new standard pattern name. (stack_protect_combined_test): Likewise. * cfgexpand.c (stack_protect_prologue): Try new stack_protect_combined_set pattern first. * function.c (stack_protect_epilogue): Try new stack_protect_combined_test pattern first. * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now parameters to control which register to use as PIC register and force reloading PIC register respectively. Insert in the stream of insns if possible. (legitimize_pic_address): Expose above new parameters in prototype and adapt recursive calls accordingly. (arm_legitimize_address): Adapt to new legitimize_pic_address prototype. (thumb_legitimize_address): Likewise. (arm_emit_call_insn): Adapt to new require_pic_register prototype. * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype change. * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address prototype change. (stack_protect_combined_set): New insn_and_split pattern. (stack_protect_set): New insn pattern. (stack_protect_combined_test): New insn_and_split pattern. (stack_protect_test): New insn pattern. * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. (UNSPEC_SP_TEST): Likewise. * doc/md.texi (stack_protect_combined_set): Document new standard pattern name. (stack_protect_set): Clarify that the operand for guard's address is legal. (stack_protect_combined_test): Document new standard pattern name. (stack_protect_test): Clarify that the operand for guard's address is legal. *** gcc/testsuite/ChangeLog *** 2018-07-05 Thomas Preud'homme * gcc.target/arm/pr85434.c: New test. Bootstrapped again for Arm and Thumb-2 and regtested with and without -fstack-protector-all without any regression. Best regards, Thomas On Thu, 19 Jul 2018 at 17:34, Thomas Preudhomme wrote: > > [Dropping Jeff Law from the list since he already commented on the > middle end parts] > > Hi Kyrill, > > On Thu, 19 Jul 2018 at 12:02, Kyrill Tkachov > wrote: > > > > Hi Thomas, > > > > On 17/07/18 12:02, Thomas Preudhomme wrote: > > > Fixed in attached patch. ChangeLog entries are unchanged: > > > > > > *** gcc/ChangeLog *** > > > > > > 2018-07-05 Thomas Preud'homme > > > > > > PR target/85434 > > > * target-insns.def (stack_protect_combined_set): Define new standard > > > pattern name. > > > (stack_protect_combined_test): Likewise. > > > * cfgexpand.c (stack_protect_prologue): Try new > > > stack_protect_combined_set pattern first. > > > * function.c (stack_protect_epilogue): Try new > > > stack_protect_combined_test pattern first. > > > * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now > > > parameters to control which register to use as PIC register and force > > > reloading PIC register respectively. > > > (legitimize_pic_address): Expose above new parameters in prototype and > > > adapt recursive calls accordingly. > > > (arm_legitimize_address): Adapt to new legitimize_pic_address > > > prototype. > > > (thumb_legitimize_address): Likewise. > > > (arm_emit_call_insn): Adapt to new require_pic_register prototype. > > > * config/arm/arm-protos.h (legitimize_pic_address): Adapt to prototype > > > change. > > > * config/arm/arm.md (movsi expander): Adapt to legitimize_pic_address > > > prototype change. > > > (stack_protect_combined_set): New insn_and_split pattern. > > > (stack_protect_set): New insn pattern. > > > (stack_protect_combined_test): New insn_and_split pattern. > > > (stack_protect_test): New insn pattern. > > > * config/arm/unspecs.md (UNSPEC_SP_SET): New unspec. > > > (UNSPEC_SP_TEST): Likewise. > > > * doc/md.texi (stack_protect_combined_set): Document new standard > > > pattern name. > > > (stack_protect_set): Clarify that the operand for guard's address is > > > legal. > > > (stack_protect_combined_test): Document new standard pattern name. > > > (stack_protect_test): Clarify that the operand
Re: [PATCH][GCC][Arm] Fix subreg crash in different way by enabling the FP16 pattern unconditionally.
Hi Tamar, On Wed, 25 Jul 2018 at 16:28, Tamar Christina wrote: > > Hi Thomas, > > Thanks for the review! > > > > > > > I don't believe the TARGET_FP16 guard to be needed, because the > > > pattern doesn't actually generate code and requires another pattern > > > for that, and a reg to reg move should always be possible anyway. So > > > allowing the force to register here is safe and it allows the compiler > > > to generate a correct error instead of ICEing in an infinite loop. > > > > How about subreg to subreg move? Doesn't that expand to more insns > > (subreg to reg and reg to subreg)? Couldn't you improve the logic to check > > that there is actually a mode change so that if there isn't (like moving > > from > > one subreg to another) just expand to a single move? > > > > Yes, but that is not a new issue. My patch is simply removing the TARGET_FP16 > restrictions and > merging two patterns that should be one using an iterator and nothing more. > > The redundant mov is already there and a different issue than the ICE I'm > trying to fix. It's there for movv4hf and movv6hf but your patch extends this problem to movv2sf and movv4sf as well. > > None of the code inside the expander is needed at all, the code really only > has an effect on subreg > to subreg moves, as `force_reg` doesn't do anything when it's argument is > already a reg. > > The comment in the expander (which was already there) is wrong. The *reason* > the ICE is fixed isn't > because of the `force_reg`. It's because of the mere presence of the expander > itself. The expander matches the > standard mov$a optab and so this prevents emit_move_insn_1 from doing the > move by subwords as it finds a pattern > that's able to do the move. Could you then fix the comment in your patch as well? I hadn't understood the force_reg was not key here. You might want to update the following sentence from your patch description if you are going to include it in your commit message: The way this is worked around in the back-end is that we have move patterns in neon.md that usually just force the register instead of checking with the back-end. "The way this is worked around (..) that just force the register" is what led me to believe the force_reg was important. > > The expander however always falls through and doesn’t stop RTL generation. > You could remove all the code in there and have > it properly match the *neon_mov instructions which will do the right thing > later at code generation time and avoid the redundant > moves. My guess is the original `force_reg` was copied from the other > patterns like `movti` and the existing `mov`. There It makes > sense because the operands can be MEM or anything general_operand. > > However the redundant moves are a different problem than what I'm trying to > solve here. So I think that's another patch which requires further > testing. I was just thinking of restricting when does the force_reg happens but if it can be removed completely I agree it should probably be done in a separate patch. Oh by the way, is there something that prevent those expander to ever be used with a memory operand? Because the GCC internals contains the following piece for mov standard pattern (bold marks added by me): "Second, these patterns are not used solely in the RTL generation pass. Even the reload pass can generate move insns to copy values from stack slots into temporary registers. When it does so, one of the operands is a hard register and the other is an operand that can need to be reloaded into a register. Therefore, when given such a pair of operands, the pattern must generate RTL which needs no reloading and needs no temporary registers—no registers other than the operands. For example, if you support the pattern with a define_ expand, then in such a case the define_expand *mustn’t call force_reg* or any other such function which might generate new pseudo registers." Best regards, Thomas > > Regards, > Tamar > > > Best regards, > > > > Thomas > > > > > > > > This patch ensures gcc.target/arm/big-endian-subreg.c is fixed without > > > introducing any regressions while fixing > > > > > > gcc.dg/vect/vect-nop-move.c execution test > > > g++.dg/torture/vshuf-v2si.C -O3 -g execution test > > > g++.dg/torture/vshuf-v4si.C -O3 -g execution test > > > g++.dg/torture/vshuf-v8hi.C -O3 -g execution test > > > > > > Regtested on armeb-none-eabi and no regressions. > > > Bootstrapped on arm-none-linux-gnueabihf and no issues. > > > > > > > > > Ok for trunk? > > > > > > Thanks, > > > Tamar > > > > > > gcc/ > > > 2018-07-23 Tamar Christina > > > > > > PR target/84711 > > > * config/arm/arm.c (arm_can_change_mode_class): Disallow subreg. > > > * config/arm/neon.md (movv4hf, movv8hf): Refactored to.. > > > (mov): ..this and enable unconditionally. > > > > > > --
Re: [PATCH][GCC][Arm] Fix subreg crash in different way by enabling the FP16 pattern unconditionally.
On Thu, 26 Jul 2018 at 12:01, Tamar Christina wrote: > > Hi Thomas, > > > -Original Message- > > From: Thomas Preudhomme > > Sent: Thursday, July 26, 2018 09:29 > > To: Tamar Christina > > Cc: gcc-patches@gcc.gnu.org; nd ; Ramana Radhakrishnan > > ; Richard Earnshaw > > ; ni...@redhat.com; Kyrylo Tkachov > > > > Subject: Re: [PATCH][GCC][Arm] Fix subreg crash in different way by > > enabling the FP16 pattern unconditionally. > > > > Hi Tamar, > > > > On Wed, 25 Jul 2018 at 16:28, Tamar Christina > > wrote: > > > > > > Hi Thomas, > > > > > > Thanks for the review! > > > > > > > > > > > > > I don't believe the TARGET_FP16 guard to be needed, because the > > > > > pattern doesn't actually generate code and requires another > > > > > pattern for that, and a reg to reg move should always be possible > > > > > anyway. So allowing the force to register here is safe and it > > > > > allows the compiler to generate a correct error instead of ICEing in > > > > > an > > infinite loop. > > > > > > > > How about subreg to subreg move? Doesn't that expand to more insns > > > > (subreg to reg and reg to subreg)? Couldn't you improve the logic to > > > > check that there is actually a mode change so that if there isn't > > > > (like moving from one subreg to another) just expand to a single move? > > > > > > > > > > Yes, but that is not a new issue. My patch is simply removing the > > > TARGET_FP16 restrictions and merging two patterns that should be one > > using an iterator and nothing more. > > > > > > The redundant mov is already there and a different issue than the ICE I'm > > trying to fix. > > > > It's there for movv4hf and movv6hf but your patch extends this problem to > > movv2sf and movv4sf as well. > > I don't understand how it can. My patch just replaces one pattern for V4HF and > one for V8HF with one pattern operating on VH. > > ;; Vector modes for 16-bit floating-point support. > (define_mode_iterator VH [V8HF V4HF]) > > My pattern has absolutely no effect on V2SF and V4SF or any of the other > modes. My bad, I was looking at VF. > > > > > > > > > None of the code inside the expander is needed at all, the code really > > > only has an effect on subreg to subreg moves, as `force_reg` doesn't do > > anything when it's argument is already a reg. > > > > > > The comment in the expander (which was already there) is wrong. The > > > *reason* the ICE is fixed isn't because of the `force_reg`. It's > > > because of the mere presence of the expander itself. The expander > > > matches the standard mov$a optab and so this prevents > > emit_move_insn_1 from doing the move by subwords as it finds a pattern > > that's able to do the move. > > > > Could you then fix the comment in your patch as well? I hadn't understood > > the force_reg was not key here. You might want to update the following > > sentence from your patch description if you are going to include it in your > > commit message: > > I'll update the comment in the patch. The cover letter won't be included in > the commit, > But it does accurately reflect the current state of affairs. The patch will > do the force_reg, > It's just not the reason it works. Understood. > > > > > The way this is worked around in the back-end is that we have move > > patterns in neon.md that usually just force the register instead of checking > > with the back-end. > > > > "The way this is worked around (..) that just force the register" is what > > led > > me to believe the force_reg was important. > > > > > > > > The expander however always falls through and doesn’t stop RTL > > > generation. You could remove all the code in there and have it > > > properly match the *neon_mov instructions which will do the right > > > thing later at code generation time and avoid the redundant moves. My > > guess is the original `force_reg` was copied from the other patterns like > > `movti` and the existing `mov`. There It makes sense because the > > operands can be MEM or anything general_operand. > > > > > > However the redundant moves are a different problem than what I'm > > > trying to solve here. So I think that's another patch which requires > &
Re: Build fail on gthr-simple.h targets (Re: AsyncI/O patch committed)
Hi Ulrich, The problem is that io/asynch.h unconditionally uses a couple of features that are not provided by gthr-simplex, in particular __gthread_cond_t and __gthread_equal / __gthread_self According to the documentation in gthr.h, the former is only available if __GTHREAD_HAS_COND is defined, and the latter are only available if __GTHREADS_CXX0X is defined. Neither is true for gthr-simple.h. Thanks for the analysis, and the pointer to the macros. Because the functionality depends on these features, it is best to remove them if it is not present. So, here is a patch which does just that. This was reg-tested on Linux, which showed that the functionality is still there. I tried bootstrapping on AIX on gcc119, but this failed due to an unrelated issue (problem with compiling the inline libraries). Would it be possible to check if this restores bootstrap in the next 10 hours or so? If so, I would like to commit this. Otherwise, Nicolas and I will not be able to fix this for a week or so, and it would be best to revert the async I/O patch :-( Regards Thomas 2018-07-25 Thomas Koenig * io/async.h: Test for feature macros for __gthread_cond_t and __gthread_equal. Define ASYNC_IO if both are present. (SIGNAL): Define as no-op if ASYNC_IO is not defined. (WAIT_SIGNAL_MUTEX): Likewise. (REVOLE_SIGNAL): Likewise. (transfer_args): Define as useless struct if ASYNC_IO is not defined. (adv_cond): Likewise. (async_unit): Likewise. * io/async.c (init_async_unit): If ASYNC_IO is not defined, define alternate function which does nothing. (enqueue_transfer): Likewise. (enqueue_done_id): Likewise. (enqueue_done): Likewise. (enqueue_close): Likewise. (enqueue_data_transfer_init): Likewise. (collect_async_errors): Likewise. (async_wait_id): Likewise. (async_wait): Likewise. (async_close): Likewise. Index: io/async.h === --- io/async.h (revision 262978) +++ io/async.h (working copy) @@ -25,6 +25,16 @@ #ifndef ASYNC_H #define ASYNC_H +/* Async I/O will not work on targets which do not support + __gthread_cond_t and __gthread_equal / __gthread_self. Check + this. */ + +#if defined(__GTHREAD_HAS_COND) && defined(__GTHREADS_CXX0X) +#define ASYNC_IO 1 +#else +#undef ASYNC_IO +#endif + /* Defining DEBUG_ASYNC will enable somewhat verbose debugging output for async I/O. */ @@ -217,6 +227,8 @@ #define INTERN_UNLOCK(mutex) T_ERROR (__gthread_mutex_unlock, mutex); +#if ASYNC_IO + #define SIGNAL(advcond) do{ \ INTERN_LOCK (&(advcond)->lock); \ (advcond)->pending = 1; \ @@ -257,6 +269,15 @@ INTERN_UNLOCK (&(advcond)->lock); \ } while (0) +#else + +#define SIGNAL(advcond) do{} while(0) +#define WAIT_SIGNAL_MUTEX(advcond, condition, mutex) do{} while(0) +#define REVOKE_SIGNAL(advcond) do{} while(0) + +#endif + +#if ASYNC_IO DEBUG_LINE (extern __thread const char *aio_prefix); DEBUG_LINE (typedef struct aio_lock_debug{ @@ -274,6 +295,7 @@ DEBUG_LINE (extern __gthread_mutex_t debug_queue_l error reporting. */ extern __thread gfc_unit *thread_unit; +#endif enum aio_do { AIO_INVALID = 0, @@ -285,6 +307,8 @@ enum aio_do { AIO_CLOSE }; +#if ASYNC_IO + typedef union transfer_args { struct @@ -342,6 +366,23 @@ typedef struct async_unit } async_unit; +#else +typedef union transfer_args +{ + int x; +}; + +struct adv_cond +{ + int x; +}; + +typedef struct async_unit +{ + int x; +}; +#endif + void init_async_unit (gfc_unit *); internal_proto (init_async_unit); Index: io/async.c === --- io/async.c (revision 262978) +++ io/async.c (working copy) @@ -36,6 +36,7 @@ #include #include "async.h" +#if ASYNC_IO DEBUG_LINE (__thread const char *aio_prefix = MPREFIX); @@ -481,3 +482,88 @@ async_close (async_unit *au) T_ERROR (__gthread_join, au->thread, NULL); free_async_unit (au); } + +#else + +/* Do-nothing function, which will not be called. */ + +void +init_async_unit (gfc_unit *u) +{ + u->au = NULL; + return; +} + +/* Do-nothing function, which will not be called. */ + +void +enqueue_transfer (async_unit *au, transfer_args *arg, enum aio_do type) +{ + return; +} + +/* Do-nothing function, which will not be called. */ + +int +enqueue_done_id (async_unit *au, enum aio_do type) +{ + return 0; +} + +/* Do-nothing function, which will not be called. */ + +void +enqueue_done (async_unit *au, enum aio_do type) +{ + return; +} + +/* Do-nothing function, which will not be called. */ + +void +enqueue_close (async_unit *au) +{ + return; +} + +/* Do-nothing function, which will not be called. */ + +void +enqueue_data_transfer_init (async_unit *au, st_parameter_dt *dt,
Re: Build fail on gthr-simple.h targets (Re: AsyncI/O patch committed)
Am 26.07.2018 um 22:54 schrieb Thomas Koenig: Hi Ulrich, The problem is that io/asynch.h unconditionally uses a couple of features that are not provided by gthr-simplex, in particular __gthread_cond_t and __gthread_equal / __gthread_self According to the documentation in gthr.h, the former is only available if __GTHREAD_HAS_COND is defined, and the latter are only available if __GTHREADS_CXX0X is defined. Neither is true for gthr-simple.h. Thanks for the analysis, and the pointer to the macros. Because the functionality depends on these features, it is best to remove them if it is not present. So, here is a patch which does just that. This was reg-tested on Linux, which showed that the functionality is still there. I tried bootstrapping on AIX on gcc119, but this failed due to an unrelated issue (problem with compiling the inline libraries). OK, this does not work. We have found a method of checking on Linux, and this does not work. We have also found a way of working in the next couple of days, so expect an update in one or two days. If that is too much time, feel free to revert the async patch in the meantime. Regards Thomas
Re: [PATCH 1/3] testsuite: Unbork multilib testing on RISC-V (and any target really)
Hi! First, Vineet, great that you've now tracked this down! :-) Indeed "early exit" vs. 'torture-finish' was exactly the issue that I suspected. It may not be what you originally intended, but I hope at least you've learned some things about DejaGnu/TCL... ;-P Yesterday, I actually had begun looking into this. To avoid the big download and having to wait for a lot of packages to be build with your 'riscv-gnu-toolchain' recipe: <https://inbox.sourceware.org/44506218-70bd-0b7b-a560-744bb2437...@rivosinc.com>, I intended to do just a quick GCC build on compile farm gcc92, which (a) didn't turn out to be quick, and (b) eventually failed due to <https://gcc.gnu.org/PR106271> "Bootstrap on RISC-V on Ubuntu 22.04 LTS: bits/libc-header-start.h: No such file or directory"... (I'm now running 'riscv-gnu-toolchain' to verify this, and another thing.) Before we push your patch, let me please verify that it indeed doesn't change any 'gcc.misc-tests/i386-prefetch.exp' semantics, and: On 2023-05-31T19:13:01+0100, Iain Sandoe via Gcc-patches wrote: >> On 31 May 2023, at 18:57, Jeff Law via Gcc-patches >> wrote: >> On 5/31/23 10:25, Vineet Gupta wrote: >>> Multilib testing on trunk is currently busted (and surprisingly this >>> affects any/all targets but it seems nobody cares). We currently get the >>> following splat: >> I wouldn't say that nobody cares, it just hasn't bubbled up on anyone's >> priority list yet (most developers aren't working on targets that make heavy >> use of multilibs). So I regularely do build x86_64 GNU/Linux with default '-m64' plus '-m32' multilib -- but of course, there's no "early exit" for those, as there's no 'string match "* -march=*" " [board_info target multilib_flags] "'... >> But probably more importantly, this problem seems to not be triggering on >> all multilib targets. For example, I just examined my tester's build logs >> and couldn't see this on the H8/300 or V850 ports. Which begs the question, >> why? ..., which may be the case for those, too? In other words: the problem only shows up if '-march=[...]' appears in the flags, which indeed may not be a common thing? I'll cross-verify this with x86_64 and '-march=[...]' flags. And, I still intend to figure out why this issue apparently disappears with my recent 'LTO_TORTURE_OPTIONS' patches reverted: <https://inbox.sourceware.org/ad8a98da-0231-7a95-017b-ea5d8ae65...@rivosinc.com>. Otherwise: > I do have a multilib problem [with libgomp] on Darwin (which has been noticed > : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109951) but it is not obvious > how the fix proposed would solve this - unless it’s some subtle change in > global content for the multilib options. > > (testing anyway) No, this is really a separate issue. I understand what's happening, and have an idea about how to address this that I'll post later. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH 1/3] testsuite: Unbork multilib testing on RISC-V (and any target really)
re.org/ad8a98da-0231-7a95-017b-ea5d8ae65...@rivosinc.com>. In the "old world", 'torture-init', *not* followed by 'set-torture-options', *not* followed by 'torture-finish', then another 'torture-init' was not a problem -- but in the "new world" it now is. This also explains my confusion; the original report was: ERROR: torture-init: torture_without_loops is not empty as expected ..., note: not 'LTO_TORTURE_OPTIONS' but 'torture_without_loops', and those I'd not directly touched in my recent changes, which had made me confused. The 'torture_without_loops' error condition now does arise if there's a 'torture-init', *not* followed by 'set-torture-options', *not* followed by 'torture-finish' (that is, 'i386-prefetch.exp'), then 'gcc-dg-runtest' ('riscv.exp', for example), which internally skips 'torture-init' (due to 'torture-init-done', due to 'LTO_TORTURE_OPTIONS'), but it does 'set-torture-options', then skips 'torture-finish', and then any next 'torture-init' detects the mismatch, thus ERRORs. I can be convinced otherwise, but I still maintain my position that requiring/checking proper bracketing of 'torture-init', 'torture-finish' is advisable, and therefore propose to not re-do my 'LTO_TORTURE_OPTIONS' changes, but indeed suggest to apply Vineet's patch, with a minor change, see below. (I cannot formally approve it, however; testsuite maintainers CCed.) As for your proposed patch: <https://inbox.sourceware.org/20230531162534.119952-2-vine...@rivosinc.com>, I suggest to move the "early exit" in front of all the setup code, that is, right after license header: [...] # <http://www.gnu.org/licenses/>. [HERE] # Test that the correct data prefetch instructions (SSE or 3DNow! variant, [...] Grüße Thomas > Otherwise: > >> I do have a multilib problem [with libgomp] on Darwin (which has been >> noticed : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109951) but it is not >> obvious how the fix proposed would solve this - unless it’s some subtle >> change in global content for the multilib options. >> >> (testing anyway) > > No, this is really a separate issue. I understand what's happening, and > have an idea about how to address this that I'll post later. > > > Grüße > Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH 2/3] RISC-V: Add missing torture-init and torture-finish for rvv.exp
Hi! On 2023-05-31T09:25:33-0700, Vineet Gupta wrote: > From: Kito Cheng > > This is in line with recent test harness expectations and is a > preventive change as it doesn't actually fix any errors. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/rvv.exp: Add torture-init and > torture-finish. > > Signed-off-by: Vineet Gupta > --- > gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp > b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp > index 5e69235a268c..7ab7456d1d15 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp > +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp > @@ -39,6 +39,7 @@ if [istarget riscv32-*-*] then { > > # Initialize `dg'. > dg-init > +torture-init > > # Main loop. > set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3" > @@ -90,5 +91,7 @@ foreach op $AUTOVEC_TEST_OPTS { > dg-runtest [lsort [glob -nocomplain > $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \ > "-std=c99 -O3 -ftree-vectorize --param > riscv-autovec-preference=fixed-vlmax" $CFLAGS > > +torture-finish > + > # All done. > dg-finish I suggest to drop this patch: 'gcc.target/riscv/rvv/rvv.exp' isn't doing anything with torture testing flags etc., but (in addition to 'dg-runtest') just calls 'gcc-dg-runtest', which internally does 'torture-init', 'torture-finish' -- like in a number of other '*.exp' files. As you say, this patch "doesn't actually fix any errors". Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951]
lappend ALWAYS_CFLAGS > "additional_flags=--sysroot=$SYSROOT_CFLAGS_FOR_TARGET" > + } Need 'global SYSROOT_CFLAGS_FOR_TARGET'. Need to change: - lappend ALWAYS_CFLAGS "additional_flags=--sysroot=$SYSROOT_CFLAGS_FOR_TARGET" + lappend ALWAYS_CFLAGS "additional_flags=$SYSROOT_CFLAGS_FOR_TARGET" ..., as 'SYSROOT_CFLAGS_FOR_TARGET' already includes '--sysroot=' prefix. > --- libgomp/testsuite/libgomp-test-support.exp.in (revision 279954) > +++ libgomp/testsuite/libgomp-test-support.exp.in (working copy) > @@ -1,5 +1,3 @@ > -set GCC_UNDER_TEST {@CC@} Also need to capture 'SYSROOT_CFLAGS_FOR_TARGET' here, so that it's then available at test time. > --- Makefile.tpl (revision 279954) > +++ Makefile.tpl (working copy) > @@ -322,6 +322,7 @@ RAW_CXX_TARGET_EXPORTS = \ > > NORMAL_TARGET_EXPORTS = \ > $(BASE_TARGET_EXPORTS) \ > + SYSROOT_CFLAGS_FOR_TARGET="$(SYSROOT_CFLAGS_FOR_TARGET)"; export > SYSROOT_CFLAGS_FOR_TARGET; \ > CXX="$(CXX_FOR_TARGET) $(XGCC_FLAGS_FOR_TARGET) $$TFLAGS"; export CXX; With that one moved into the generic 'BASE_TARGET_EXPORTS', adapting things to the current state of affaris plus some polishing, the attached "Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951]" appears to be doing the right thing per my (limited, so far) testing. Will you, Maciej, please test that this doesn't break your setting? Will you, Iain, please test that this does address <https://gcc.gnu.org/PR109951> "libgomp, testsuite: non-native multilib c++ tests fail on Darwin"? Anybody got any other comments? Once that's all clear, we may either push this one already, and then I work on applying the same changes to other target libraries incrementally, or do all that at once. I suppose: "push this one already", to restore Iain's testing as well as giving these changes some wider exposure, and in parallel "I work on applying the same changes to other target libraries incrementally"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From ab5d1d3ef91b2ad7e1ed262c9487f727b79ceb3f Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 1 Jun 2023 23:07:37 +0200 Subject: [PATCH] Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951] PR testsuite/109951 * Makefile.tpl (BASE_TARGET_EXPORTS): Add 'SYSROOT_CFLAGS_FOR_TARGET'. * Makefile.in: Regenerate. libgomp/ * configure.ac: Revert earlier changes, instead 'AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET)'. * Makefile.in: Regenerate. * configure: Likewise. * testsuite/Makefile.in: Likewise. * testsuite/lib/libgomp.exp (libgomp_init): Remove "Fix up '-funconfigured-libstdc++-v3' in 'GXX_UNDER_TEST'" code. If '--with-build-sysroot=[...]' was specified, use it for build-tree testing. * testsuite/libgomp-site-extra.exp.in (GCC_UNDER_TEST) (GXX_UNDER_TEST, GFORTRAN_UNDER_TEST): Don't set. (SYSROOT_CFLAGS_FOR_TARGET): Set. * testsuite/libgomp.c++/c++.exp (lang_source_re) (lang_include_flags): Set for build-tree testing. * testsuite/libgomp.oacc-c++/c++.exp (lang_source_re) (lang_include_flags): Likewise. Co-authored-by: Chung-Lin Tang --- Makefile.in | 1 + Makefile.tpl| 1 + libgomp/Makefile.in | 2 +- libgomp/configure | 17 - libgomp/configure.ac| 15 +++ libgomp/testsuite/Makefile.in | 2 +- libgomp/testsuite/lib/libgomp.exp | 18 +- libgomp/testsuite/libgomp-site-extra.exp.in | 4 +--- libgomp/testsuite/libgomp.c++/c++.exp | 6 ++ libgomp/testsuite/libgomp.oacc-c++/c++.exp | 6 ++ 10 files changed, 29 insertions(+), 43 deletions(-) diff --git a/Makefile.in b/Makefile.in index b559454cc90..e75bd98bde5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -320,6 +320,7 @@ BASE_TARGET_EXPORTS = \ RANLIB="$(RANLIB_FOR_TARGET)"; export RANLIB; \ READELF="$(READELF_FOR_TARGET)"; export READELF; \ STRIP="$(STRIP_FOR_TARGET)"; export STRIP; \ + SYSROOT_CFLAGS_FOR_TARGET="$(SYSROOT_CFLAGS_FOR_TARGET)"; export SYSROOT_CFLAGS_FOR_TARGET; \ WINDRES="$(WINDRES_FOR_TARGET)"; export WINDRES; \ WINDMC="$(WINDMC_FOR_TARGET)"; export WI
Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'
Hi! OK to push the attached "Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 0d5095d8cd2d68113890a39a7fdb649198e576c1 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 2 Jun 2023 23:11:00 +0200 Subject: [PATCH] Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90' gcc/testsuite/ * gfortran.fortran-torture/execute/math.f90: Enhance for optional OpenACC, OpenMP 'target' usage. libgomp/ * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New. * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: Likewise. --- .../gfortran.fortran-torture/execute/math.f90 | 23 +-- .../fortran-torture_execute_math.f90 | 4 .../fortran-torture_execute_math.f90 | 5 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 index 17cc78f7a10..e71f669304f 100644 --- a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 +++ b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 @@ -1,9 +1,14 @@ ! Program to test mathematical intrinsics + +! See also 'libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90'; thus the '!$omp' directives. +! See also 'libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90'; thus the '!$acc' directives. + subroutine dotest (n, val4, val8, known) implicit none real(kind=4) val4, known real(kind=8) val8 integer n + !$acc routine seq if (abs (val4 - known) .gt. 0.001) STOP 1 if (abs (real (val8, kind=4) - known) .gt. 0.001) STOP 2 @@ -14,17 +19,20 @@ subroutine dotestc (n, val4, val8, known) complex(kind=4) val4, known complex(kind=8) val8 integer n + !$acc routine seq + if (abs (val4 - known) .gt. 0.001) STOP 3 if (abs (cmplx (val8, kind=4) - known) .gt. 0.001) STOP 4 end subroutine -program testmath +subroutine testmath implicit none real(kind=4) r, two4, half4 real(kind=8) q, two8, half8 complex(kind=4) cr complex(kind=8) cq external dotest, dotestc + !$acc routine seq two4 = 2.0 two8 = 2.0_8 @@ -96,5 +104,16 @@ program testmath cq = log ((-1.0_8, -1.0_8)) call dotestc (21, cr, cq, (0.3466, -2.3562)) -end program +end subroutine +program main + implicit none + external testmath + + !$acc serial + !$omp target + call testmath + !$acc end serial + !$omp end target + +end program diff --git a/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 new file mode 100644 index 000..3348a0bb3ad --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 @@ -0,0 +1,4 @@ +! { dg-do run } +! { dg-additional-options -foffload-options=-lm } + +include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90' diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 new file mode 100644 index 000..1b2ac440762 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 @@ -0,0 +1,5 @@ +! { dg-do run } +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } +! { dg-additional-options -foffload-options=-lm } + +include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90' -- 2.34.1
driver: Forward '-lgfortran', '-lm' to offloading compilation
Hi! OK to push the attached "driver: Forward '-lgfortran', '-lm' to offloading compilation"? (We didn't have a PR open for that, or did we?) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 5d3cb866cad3bbcf47c5e66825e5710e86cc017e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 5 Jun 2023 11:26:37 +0200 Subject: [PATCH] driver: Forward '-lgfortran', '-lm' to offloading compilation ..., so that users don't manually need to specify '-foffload-options=-lgfortran', '-foffload-options=-lm' in addition to '-lgfortran', '-lm' (specified manually, or implicitly by the driver). gcc/ * gcc.cc (driver_handle_option): Forward host '-lgfortran', '-lm' to offloading compilation. * config/gcn/mkoffload.cc (main): Adjust. * config/nvptx/mkoffload.cc (main): Likewise. * doc/invoke.texi (foffload-options): Update example. libgomp/ * testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Don't set. * testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Likewise. * testsuite/libgomp.c/simd-math-1.c: Remove '-foffload-options=-lm'. * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: Likewise. * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: Likewise. --- gcc/config/gcn/mkoffload.cc | 12 gcc/config/nvptx/mkoffload.cc | 12 gcc/doc/invoke.texi | 5 +- gcc/gcc.cc| 56 +++ libgomp/testsuite/libgomp.c/simd-math-1.c | 1 - .../fortran-torture_execute_math.f90 | 1 - libgomp/testsuite/libgomp.fortran/fortran.exp | 2 - .../fortran-torture_execute_math.f90 | 1 - .../libgomp.oacc-fortran/fortran.exp | 2 - 9 files changed, 82 insertions(+), 10 deletions(-) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 988c12318fd..8b608bf024e 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -946,6 +946,18 @@ main (int argc, char **argv) else if (startswith (argv[i], STR)) gcn_stack_size = atoi (argv[i] + strlen (STR)); #undef STR + /* Translate host into offloading libraries. */ + else if (strcmp (argv[i], "-l_GCC_gfortran") == 0 + || strcmp (argv[i], "-l_GCC_m") == 0) + { + /* Elide '_GCC_'. */ + size_t i_dst = strlen ("-l"); + size_t i_src = strlen ("-l_GCC_"); + char c; + do + c = argv[i][i_dst++] = argv[i][i_src++]; + while (c != '\0'); + } } if (!(fopenacc ^ fopenmp)) diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index 6cdea45cffe..aaea9fb320d 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -649,6 +649,18 @@ main (int argc, char **argv) else if (strcmp (argv[i], "-dumpbase") == 0 && i + 1 < argc) dumppfx = argv[++i]; + /* Translate host into offloading libraries. */ + else if (strcmp (argv[i], "-l_GCC_gfortran") == 0 + || strcmp (argv[i], "-l_GCC_m") == 0) + { + /* Elide '_GCC_'. */ + size_t i_dst = strlen ("-l"); + size_t i_src = strlen ("-l_GCC_"); + char c; + do + c = argv[i][i_dst++] = argv[i][i_src++]; + while (c != '\0'); + } } if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either %<-fopenacc%> or %<-fopenmp%> " diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d2d639c92d4..7b3a2a74459 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -2716,9 +2716,8 @@ the @option{-foffload-options=@var{target-list}=@var{options}} form. The Typical command lines are @smallexample --foffload-options=-lgfortran -foffload-options=-lm --foffload-options="-lgfortran -lm" -foffload-options=nvptx-none=-latomic --foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-lm +-foffload-options='-fno-math-errno -ffinite-math-only' -foffload-options=nvptx-none=-latomic +-foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-O3 @end smallexample @opindex fopenacc diff --git a/gcc/gcc.cc b/gcc/gcc.cc index 2ccca00d603..15995206856 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -47,6 +47,9 @@ compilation is specified by a string called a "spec". */ #include "opts-jobserver.h" #include "common/common-target.h" +#ifndef MATH_LIBRARY +#define MATH_LIBRARY "m" +#endif /* Manage the manipulation of env vars. @@ -4117,6 +4120,48 @@ next_item: } }
Support 'UNSUPPORTED: [...]: exception handling disabled' for libstdc++ testing (was: Support in the GCC(/C++) test suites for '-fno-exceptions')
Hi! On 2023-06-06T20:31:21+0100, Jonathan Wakely wrote: > On Tue, 6 Jun 2023 at 20:14, Thomas Schwinge > wrote: >> This issue comes up in context of me working on C++ support for GCN and >> nvptx target. Those targets shall default to '-fno-exceptions' -- or, >> "in other words", '-fexceptions' is not supported. (Details omitted >> here.) >> >> It did seem clear to me that with such a configuration it'll be hard to >> get clean test results. Then I found code in >> 'gcc/testsuite/lib/gcc-dg.exp:gcc-dg-prune': >> >> # If exceptions are disabled, mark tests expecting exceptions to be >> enabled >> # as unsupported. >> if { ![check_effective_target_exceptions_enabled] } { >> if [regexp "(^|\n)\[^\n\]*: error: exception handling disabled" >> $text] { >> return "::unsupported::exception handling disabled" >> } >> >> ..., which, in a way, sounds as if the test suite generally is meant to >> produce useful results for '-fno-exceptions', nice surprise! >> >> Running x86_64-pc-linux-gnu (not yet GCN, nvptx) 'make check' with: >> >> RUNTESTFLAGS='--target_board=unix/-fno-exceptions\{,-m32\}' >> >> ..., I find that indeed this does work for a lot of test cases, where we >> then get (random example): >> >> PASS: g++.dg/coroutines/pr99710.C (test for errors, line 23) >> -PASS: g++.dg/coroutines/pr99710.C (test for excess errors) >> +UNSUPPORTED: g++.dg/coroutines/pr99710.C: exception handling disabled >> >> ..., due to: >> >> [...]/g++.dg/coroutines/pr99710.C: In function 'task my_coro()': >> +[...]/g++.dg/coroutines/pr99710.C:18:10: error: exception handling >> disabled, use '-fexceptions' to enable >> [...]/g++.dg/coroutines/pr99710.C:23:7: error: await expressions are >> not permitted in handlers >> compiler exited with status 1 >> >> But, we're nowhere near clean test results: PASS -> FAIL as well as >> XFAIL -> XPASS regressions, due to 'error: exception handling disabled' >> precluding other diagnostics seems to be one major issue. >> >> Is there interest in me producing the obvious (?) changes to those test >> cases, such that compiler g++ as well as target library libstdc++ test >> results are reasonably clean? (If you think that's all "wasted effort", >> then I suppose I'll just locally ignore any FAILs/XPASSes/UNRESOLVEDs >> that appear in combination with >> 'UNSUPPORTED: [...]: exception handling disabled'.) > > I would welcome that for libstdc++. Assuming no issues found in testing, OK to push the attached "Support 'UNSUPPORTED: [...]: exception handling disabled' for libstdc++ testing"? (Thanks, Jozef!) > I do sometimes run the libstdc++ tests > with "unusual" options, like -fno-exceptions and -fno-rtti (e.g. today I've > been fixing FAILs that only happen with -fexcess-precision=standard). I > just manually ignore the tests that fail for -fno-exceptions, but it would > be great if they were automatically skipped as UNSUPPORTED. > > We already have a handful of tests that use #if __cpp_exceptions to make > those parts conditional on exception support. We also have exactly one test > that is currently UNSUPPORTED when -fno-exceptions is used: > testsuite/18_support/nested_exception/rethrow_if_nested-term.cc:// { > dg-skip-if "" { *-*-* } { "-fno-exceptions" } } ACK -- that'll only work for explicit '-fno-exceptions', but not for implicit (say, via 'CC1PLUS_SPEC'), right? So, indeed: > That could be changed to use an effective target keyword instead. I'll look into that later. > To add an effective-target to the libstdc++ testsuite would be as simple as: > > --- a/libstdc++-v3/testsuite/lib/libstdc++.exp > +++ b/libstdc++-v3/testsuite/lib/libstdc++.exp > @@ -1421,6 +1421,14 @@ proc check_effective_target_tzdb { } { > }] > } > > +# Return 1 if exception handling is enabled. > +proc check_effective_target_exceptions_enabled { } { > +return [check_v3_target_prop_cached et_eh { > + set cond "defined __cpp_exceptions" > + return [v3_check_preprocessor_condition eh $cond] > +}] > +} > + Well, we don't even need to do that, because: > However, you probably want to add it to the main testsuite instead, which > would be a little more involved (the v3_check_preprocessor_condition proc > is specific to libstdc++). ..., this
Re: Support 'UNSUPPORTED: [...]: exception handling disabled' for libstdc++ testing (was: Support in the GCC(/C++) test suites for '-fno-exceptions')
Hi! On 2023-06-07T09:12:31+0100, Jonathan Wakely wrote: > On Wed, 7 Jun 2023 at 08:13, Thomas Schwinge wrote: >> On 2023-06-06T20:31:21+0100, Jonathan Wakely wrote: >> > On Tue, 6 Jun 2023 at 20:14, Thomas Schwinge >> > wrote: >> >> This issue comes up in context of me working on C++ support for GCN and >> >> nvptx target. Those targets shall default to '-fno-exceptions' -- or, >> >> "in other words", '-fexceptions' is not supported. (Details omitted >> >> here.) >> >> >> >> It did seem clear to me that with such a configuration it'll be hard to >> >> get clean test results. Then I found code in >> >> 'gcc/testsuite/lib/gcc-dg.exp:gcc-dg-prune': >> >> >> >> # If exceptions are disabled, mark tests expecting exceptions to be >> >> enabled >> >> # as unsupported. >> >> if { ![check_effective_target_exceptions_enabled] } { >> >> if [regexp "(^|\n)\[^\n\]*: error: exception handling disabled" >> >> $text] { >> >> return "::unsupported::exception handling disabled" >> >> } >> >> >> >> ..., which, in a way, sounds as if the test suite generally is meant to >> >> produce useful results for '-fno-exceptions', nice surprise! >> >> >> >> Running x86_64-pc-linux-gnu (not yet GCN, nvptx) 'make check' with: >> >> >> >> RUNTESTFLAGS='--target_board=unix/-fno-exceptions\{,-m32\}' >> >> >> >> ..., I find that indeed this does work for a lot of test cases, where we >> >> then get (random example): >> >> >> >> PASS: g++.dg/coroutines/pr99710.C (test for errors, line 23) >> >> -PASS: g++.dg/coroutines/pr99710.C (test for excess errors) >> >> +UNSUPPORTED: g++.dg/coroutines/pr99710.C: exception handling >> disabled >> >> >> >> ..., due to: >> >> >> >> [...]/g++.dg/coroutines/pr99710.C: In function 'task my_coro()': >> >> +[...]/g++.dg/coroutines/pr99710.C:18:10: error: exception handling >> >> disabled, use '-fexceptions' to enable >> >> [...]/g++.dg/coroutines/pr99710.C:23:7: error: await expressions >> are >> >> not permitted in handlers >> >> compiler exited with status 1 >> >> >> >> But, we're nowhere near clean test results: PASS -> FAIL as well as >> >> XFAIL -> XPASS regressions, due to 'error: exception handling disabled' >> >> precluding other diagnostics seems to be one major issue. >> >> >> >> Is there interest in me producing the obvious (?) changes to those test >> >> cases, such that compiler g++ as well as target library libstdc++ test >> >> results are reasonably clean? (If you think that's all "wasted effort", >> >> then I suppose I'll just locally ignore any FAILs/XPASSes/UNRESOLVEDs >> >> that appear in combination with >> >> 'UNSUPPORTED: [...]: exception handling disabled'.) >> > >> > I would welcome that for libstdc++. >> >> Assuming no issues found in testing, OK to push the attached >> "Support 'UNSUPPORTED: [...]: exception handling disabled' for libstdc++ >> testing"? >> (Thanks, Jozef!) > > Yes please. Pushed commit r14-1604-g5faaabef3819434d13fcbf749bd07bfc98ca7c3c "Support 'UNSUPPORTED: [...]: exception handling disabled' for libstdc++ testing" to master branch, as posted. For one-week-old GCC commit 2720bbd597f56742a17119dfe80edc2ba86af255, x86_64-pc-linux-gnu, I see no changes without '-fno-exceptions' (as expected), and otherwise: === libstdc++ Summary for [-unix-]{+unix/-fno-exceptions+} === # of expected passes[-15044-]{+12877+} # of unexpected failures[-5-]{+10+} # of expected failures [-106-]{+77+} {+# of unresolved testcases 6+} # of unsupported tests [-747-]{+1846+} As expected, there's a good number of (random example): -PASS: 18_support/105387.cc (test for excess errors) -PASS: 18_support/105387.cc execution test +UNSUPPORTED: 18_support/105387.cc: exception handling disabled ..., plus the following: [-PASS:-]{+FAIL:+} 23_containers/vector/capacity/constexpr.cc (test for excess errors) [...]/libstdc++-v3/testsuite/23_containers/vector/capacity/constexpr.cc:101: error: non-
Remove 'gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s' (was: [PATCH] add -Wmismatched-new-delete to middle end (PR 90629))
Hi! On 2020-11-03T16:56:48-0700, Martin Sebor via Gcc-patches wrote: > Attached is a simple middle end implementation of detection of > mismatched pairs of calls to C++ new and delete, along with > a substantially enhanced implementation of -Wfree-nonheap-object. This eventually became commit dce6c58db87ebf7f4477bd3126228e73e497 "Add support for detecting mismatched allocation/deallocation calls". Already in this original patch submission: > diff --git a/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s > b/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s > new file mode 100644 > index 000..e69de29bb2d OK to push the attached "Remove 'gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s'"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From d04c97b40a07bd2a3205d9de8577024f5d26aba0 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 7 Jun 2023 16:01:39 +0200 Subject: [PATCH] Remove 'gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s' ..., which, presumably, was added by mistake in commit dce6c58db87ebf7f4477bd3126228e73e497 "Add support for detecting mismatched allocation/deallocation calls". gcc/testsuite/ * g++.dg/warn/Wfree-nonheap-object.s: Remove. --- gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s diff --git a/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s b/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object.s deleted file mode 100644 index e69de29bb2d..000 -- 2.34.1
Tighten 'dg-warning' alternatives in 'c-c++-common/Wfree-nonheap-object{,-2,-3}.c' (was: [PATCH] correct -Wmismatched-new-delete (PR 98160, 98166))
Hi! On 2020-12-08T13:46:32-0700, Martin Sebor via Gcc-patches wrote: > The attached changes [...] ... eventually became commit fe7f75cf16783589eedbab597e6d0b8d35d7e470 "Correct/improve maybe_emit_free_warning (PR middle-end/98166, PR c++/57111, PR middle-end/98160)". > * c-c++-common/Wfree-nonheap-object-2.c: New test. > * c-c++-common/Wfree-nonheap-object-3.c: New test. > * c-c++-common/Wfree-nonheap-object.c: New test. OK to push the attached "Tighten 'dg-warning' alternatives in 'c-c++-common/Wfree-nonheap-object{,-2,-3}.c'"? Grüße Thomas > diff --git a/gcc/testsuite/c-c++-common/Wfree-nonheap-object-2.c > b/gcc/testsuite/c-c++-common/Wfree-nonheap-object-2.c > new file mode 100644 > index 000..0aedf1babbc > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/Wfree-nonheap-object-2.c > @@ -0,0 +1,52 @@ > +/* PR middle-end/98166: bogus -Wmismatched-dealloc on user-defined allocator > + and inlining > + Verify that the allocator can be declared inline without a warning when > + it's associated with a standard deallocator. Associating an inline > + deallocator with an allocator would cause false positives when the former > + calls a deallocation function the allocator isn't associated with, so > + that triggers a warning on declaration. > + { dg-do compile } > + { dg-options "-O2 -Wall" } */ > + > +__attribute__ ((malloc (__builtin_free))) > +inline int* > +alloc_int (int n) > +{ > + return (int*)__builtin_malloc (n + sizeof (int)); > +} > + > +void test_nowarn_int (int n) > +{ > + { > +int *p = alloc_int (n); > +__builtin_free (p); > + } > + > + { > +int *p = alloc_int (n); > +__builtin_free (p + 1); // { dg-warning "\\\[-Wfree-nonheap-object" } > + } > +} > + > + > +inline void > +dealloc_long (long *p) > +{ > + __builtin_free (p); // { dg-warning "'__builtin_free|void > __builtin_free\\(void\\*\\)' called on pointer 'p|' with nonzero > offset" } > +} > + > +__attribute__ ((malloc (dealloc_long))) > +long* alloc_long (int); // { dg-warning "'malloc \\\(dealloc_long\\\)' > attribute ignored with deallocation functions declared 'inline'" } > + > +void test_nowarn_long (int n) > +{ > + { > +long *p = alloc_long (n); > +dealloc_long (p); > + } > + > + { > +long *p = alloc_long (n); > +dealloc_long (p + 1); > + } > +} > diff --git a/gcc/testsuite/c-c++-common/Wfree-nonheap-object-3.c > b/gcc/testsuite/c-c++-common/Wfree-nonheap-object-3.c > new file mode 100644 > index 000..41a5b50362e > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/Wfree-nonheap-object-3.c > @@ -0,0 +1,70 @@ > +/* PR middle-end/98166: bogus -Wmismatched-dealloc on user-defined allocator > + and inlining > + Verify that without inlining, both the allocator and the deallocator > + can be declared inline without a warning and that mismatched calls are > + detected, but that declaring them always_inline does trigger a warning. > + { dg-do compile } > + { dg-options "-Wall" } */ > + > +__attribute__ ((malloc (__builtin_free))) > +inline int* > +alloc_int (int n) > +{ > + return (int*)__builtin_malloc (n + sizeof (int)); > +} > + > +void test_nowarn_int (int n) > +{ > + { > +int *p = alloc_int (n); > +__builtin_free (p); > + } > + > + { > +int *p = alloc_int (n); > +__builtin_free (p + 1); // { dg-warning "'__builtin_free|void > __builtin_free\\(void\\*\\)' called on pointer 'p|' with nonzero > offset" } > + } > +} > + > + > +inline void > +dealloc_long (long *p) { __builtin_free (p); } > + > +__attribute__ ((malloc (dealloc_long))) > +long* alloc_long (int); > + > +void test_nowarn_long (int n) > +{ > + { > +long *p = alloc_long (n); > +dealloc_long (p); > + } > + > + { > +long *p = alloc_long (n); > +dealloc_long (p + 1); // { dg-warning "'dealloc_long' called on > pointer 'p|' with nonzero offset" } > + } > +} > + > + > +inline __attribute__ ((always_inline)) void > +dealloc_float (float *p) // { dg-message "deallocation function > declared here" } > +{ > + __builtin_free (p); // { dg-warning "'__builtin_free|void > __builtin_free\\(void\\*\\)' called on pointer 'p|' with nonzero > offset" } > +} > + > +__attribute__ ((malloc (dealloc_float))) > +float* alloc_float (int); // { dg-warning "'
[ping] Add 'libgomp.{, oacc-}fortran/fortran-torture_execute_math.f90'
Hi! On 2023-06-05T14:18:48+0200, I wrote: > OK to push the attached > "Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'"? Ping. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 0d5095d8cd2d68113890a39a7fdb649198e576c1 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 2 Jun 2023 23:11:00 +0200 Subject: [PATCH] Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90' gcc/testsuite/ * gfortran.fortran-torture/execute/math.f90: Enhance for optional OpenACC, OpenMP 'target' usage. libgomp/ * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New. * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: Likewise. --- .../gfortran.fortran-torture/execute/math.f90 | 23 +-- .../fortran-torture_execute_math.f90 | 4 .../fortran-torture_execute_math.f90 | 5 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 index 17cc78f7a10..e71f669304f 100644 --- a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 +++ b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 @@ -1,9 +1,14 @@ ! Program to test mathematical intrinsics + +! See also 'libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90'; thus the '!$omp' directives. +! See also 'libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90'; thus the '!$acc' directives. + subroutine dotest (n, val4, val8, known) implicit none real(kind=4) val4, known real(kind=8) val8 integer n + !$acc routine seq if (abs (val4 - known) .gt. 0.001) STOP 1 if (abs (real (val8, kind=4) - known) .gt. 0.001) STOP 2 @@ -14,17 +19,20 @@ subroutine dotestc (n, val4, val8, known) complex(kind=4) val4, known complex(kind=8) val8 integer n + !$acc routine seq + if (abs (val4 - known) .gt. 0.001) STOP 3 if (abs (cmplx (val8, kind=4) - known) .gt. 0.001) STOP 4 end subroutine -program testmath +subroutine testmath implicit none real(kind=4) r, two4, half4 real(kind=8) q, two8, half8 complex(kind=4) cr complex(kind=8) cq external dotest, dotestc + !$acc routine seq two4 = 2.0 two8 = 2.0_8 @@ -96,5 +104,16 @@ program testmath cq = log ((-1.0_8, -1.0_8)) call dotestc (21, cr, cq, (0.3466, -2.3562)) -end program +end subroutine +program main + implicit none + external testmath + + !$acc serial + !$omp target + call testmath + !$acc end serial + !$omp end target + +end program diff --git a/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 new file mode 100644 index 000..3348a0bb3ad --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 @@ -0,0 +1,4 @@ +! { dg-do run } +! { dg-additional-options -foffload-options=-lm } + +include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90' diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 new file mode 100644 index 000..1b2ac440762 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 @@ -0,0 +1,5 @@ +! { dg-do run } +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } +! { dg-additional-options -foffload-options=-lm } + +include '../../../gcc/testsuite/gfortran.fortran-torture/execute/math.f90' -- 2.34.1
[ping] driver: Forward '-lgfortran', '-lm' to offloading compilation
Hi! On 2023-06-05T14:25:18+0200, I wrote: > OK to push the attached > "driver: Forward '-lgfortran', '-lm' to offloading compilation"? > (We didn't have a PR open for that, or did we?) Ping. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 5d3cb866cad3bbcf47c5e66825e5710e86cc017e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 5 Jun 2023 11:26:37 +0200 Subject: [PATCH] driver: Forward '-lgfortran', '-lm' to offloading compilation ..., so that users don't manually need to specify '-foffload-options=-lgfortran', '-foffload-options=-lm' in addition to '-lgfortran', '-lm' (specified manually, or implicitly by the driver). gcc/ * gcc.cc (driver_handle_option): Forward host '-lgfortran', '-lm' to offloading compilation. * config/gcn/mkoffload.cc (main): Adjust. * config/nvptx/mkoffload.cc (main): Likewise. * doc/invoke.texi (foffload-options): Update example. libgomp/ * testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Don't set. * testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Likewise. * testsuite/libgomp.c/simd-math-1.c: Remove '-foffload-options=-lm'. * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: Likewise. * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: Likewise. --- gcc/config/gcn/mkoffload.cc | 12 gcc/config/nvptx/mkoffload.cc | 12 gcc/doc/invoke.texi | 5 +- gcc/gcc.cc| 56 +++ libgomp/testsuite/libgomp.c/simd-math-1.c | 1 - .../fortran-torture_execute_math.f90 | 1 - libgomp/testsuite/libgomp.fortran/fortran.exp | 2 - .../fortran-torture_execute_math.f90 | 1 - .../libgomp.oacc-fortran/fortran.exp | 2 - 9 files changed, 82 insertions(+), 10 deletions(-) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 988c12318fd..8b608bf024e 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -946,6 +946,18 @@ main (int argc, char **argv) else if (startswith (argv[i], STR)) gcn_stack_size = atoi (argv[i] + strlen (STR)); #undef STR + /* Translate host into offloading libraries. */ + else if (strcmp (argv[i], "-l_GCC_gfortran") == 0 + || strcmp (argv[i], "-l_GCC_m") == 0) + { + /* Elide '_GCC_'. */ + size_t i_dst = strlen ("-l"); + size_t i_src = strlen ("-l_GCC_"); + char c; + do + c = argv[i][i_dst++] = argv[i][i_src++]; + while (c != '\0'); + } } if (!(fopenacc ^ fopenmp)) diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index 6cdea45cffe..aaea9fb320d 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -649,6 +649,18 @@ main (int argc, char **argv) else if (strcmp (argv[i], "-dumpbase") == 0 && i + 1 < argc) dumppfx = argv[++i]; + /* Translate host into offloading libraries. */ + else if (strcmp (argv[i], "-l_GCC_gfortran") == 0 + || strcmp (argv[i], "-l_GCC_m") == 0) + { + /* Elide '_GCC_'. */ + size_t i_dst = strlen ("-l"); + size_t i_src = strlen ("-l_GCC_"); + char c; + do + c = argv[i][i_dst++] = argv[i][i_src++]; + while (c != '\0'); + } } if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either %<-fopenacc%> or %<-fopenmp%> " diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d2d639c92d4..7b3a2a74459 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -2716,9 +2716,8 @@ the @option{-foffload-options=@var{target-list}=@var{options}} form. The Typical command lines are @smallexample --foffload-options=-lgfortran -foffload-options=-lm --foffload-options="-lgfortran -lm" -foffload-options=nvptx-none=-latomic --foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-lm +-foffload-options='-fno-math-errno -ffinite-math-only' -foffload-options=nvptx-none=-latomic +-foffload-options=amdgcn-amdhsa=-march=gfx906 -foffload-options=-O3 @end smallexample @opindex fopenacc diff --git a/gcc/gcc.cc b/gcc/gcc.cc index 2ccca00d603..15995206856 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -47,6 +47,9 @@ compilation is specified by a string called a "spec". */ #include "opts-jobserver.h" #include "common/common-target.h" +#ifndef MATH_LIBRARY +#define MATH_LIBRARY "m" +#endif /* Manage the manipulatio
Fix typo in 'libgomp.c/target-51.c' (was: [patch] OpenMP: Set default-device-var with OMP_TARGET_OFFLOAD=mandatory)
Hi! On 2023-06-13T20:44:39+0200, Tobias Burnus wrote: > I intent to commit this tomorrow, unless there are comments. I'm sorry I'm late. ;-P > It does as it says (see commit log): It initializes default-device-var > to the value using the algorithm described in OpenMP 5.2, which > depends on whether OMP_TARGET_OFFLOAD=mandatory was set. > > NOTE: With -foffload=disable there is no binary code but still > devices get found - such that default-device-var == 0 (= first > nonhost device). Thus, in that case, libgomp runs the code on that > device but as no binary data is available, host fallback is used. > (Even if there would be executable code for another device on > the system.) > With mandatory, this unintended host fallback is detected and an > error is diagnosed. One can argue whether keeping the devices > makes sense (e.g. because in a dynamic library device code will > be loaded later) or not (don't list if no code is available). This reminds me of the (unresolved) <https://gcc.gnu.org/PR81886> "Means to determine at runtime foffload targets specified at compile time". > Note that TR11 (future OpenMP 6.0) extends OMP_DEFAULT_DEVICE and > adds OMP_AVAILABLE_DEVICES which permit a finer-grained control about > the device, including OMP_DEFAULT_DEVICE=initial (and 'invalid') which > the current scheme does not permit. (Well, there is > OMP_TARGET_OFFLOAD=disabled, but that's a too big hammer.) > PS: DejaGNU testing was done without offloading configured > and with remote testing on a system having an offload device, > which which does not support setting environment variables. > Manual testing was done with offloading enabled and depending > on the testcase, running on a system with and/or without offloading > hardware. > --- a/libgomp/target.c > +++ b/libgomp/target.c > @@ -150,7 +150,11 @@ resolve_device (int device_id, bool remapped) >if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK >: omp_initial_device)) > return NULL; > - if (device_id == omp_invalid_device) > + if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY > + && gomp_get_num_devices () == 0) > + gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY but only the host " > + "device is available"); > + else if (device_id == omp_invalid_device) > gomp_fatal ("omp_invalid_device encountered"); >else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) > gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " | "but device not found"); | |return NULL; | } |else if (device_id >= gomp_get_num_devices ()) | { |if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY | && device_id != num_devices_openmp) | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " | "but device not found"); | |return NULL; | } | |gomp_mutex_lock (&devices[device_id].lock); |if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) | gomp_init_device (&devices[device_id]); |else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) | { |gomp_mutex_unlock (&devices[device_id].lock); | |if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " | "but device is finalized"); | |return NULL; | } |gomp_mutex_unlock (&devices[device_id].lock); | |return &devices[device_id]; | } > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/target-51.c > @@ -0,0 +1,24 @@ > +/* Check OMP_TARGET_OFFLOAD on systems with no available non-host devices, > + which is enforced by using -foffload=disable. */ > + > +/* { dg-do run } */ > +/* { dg-additional-options "-foffload=disable" } */ > +/* { dg-set-target-env-var OMP_TARGET_OFFLOAD "mandatory" } */ > + > +/* { dg-shouldfail "OMP_TARGET_OFFLOAD=mandatory and no available device" } > */ > + > +/* See comment in target-50.c/target-50.c for why the output differs. */ > + > +/* { dg-output ".*libgomp: OMP_TARGET_OFFLOAD is set to MANDATORY but only > the host device is available.*" { target { ! offload_device } } } */ > +/* { dg-output ".*libgomp: OMP_TARGET_OFFLOAD is set to MANDATORY but device > not found.*" { target offload_device } } */ I intend to push the attached "Fix typo in 'libgomp.c/target-51.c'" after testing. Let me know if I should also adjust the new 'target { ! offload_device }' diagnostic "[...] MANDATORY but only
Re: [committed] OpenMP: Cleanups related to the 'present' modifier
Hi Tobias! On 2023-06-12T18:44:23+0200, Tobias Burnus wrote: > Cleanup follow up to >r14-1579-g4ede915d5dde93 "openmp: Add support for the 'present' modifier" > committed 6 days ago. > > Namely: > * Replace for the program → libgomp ABI > GOMP_MAP_PRESENT_[ALLOC,TO,FROM,TOFROM] >by the preexisting GOMP_MAP_FORCE_PRESENT but keep the other enum values >(and use them until gimplifcation). > > * Improve wording if a non-existing/unsupported map-type modifier was used >by not referring to 'omp target' as it could be also target (enter/exit) > data. >+ Add a testcase for enter/exit data + data. > > * Unify + improve wording shown for 'present' when not present on the device. > > * Extend in the testcases to check that data actually gets copied with >'target update' and 'map when the 'present' modifier is present. > > Committed as Rev. r14-1736-g38944ec2a6fa10 > OpenMP: Cleanups related to the 'present' modifier > > Reduce number of enum values passed to libgomp as > GOMP_MAP_PRESENT_{TO,TOFROM,FROM,ALLOC} have the same semantic as > GOMP_MAP_FORCE_PRESENT (i.e. abort if not present, otherwise ignore); > that's different to GOMP_MAP_ALWAYS_PRESENT_{TO,TOFROM,FROM} which also > abort if not present but copy data when present. This is is a follow-up to > the commit r14-1579-g4ede915d5dde93 done 6 days ago. Great, that matches how I thought this should be done (re our 2023-06-07 GCC IRC discussion). > Additionally, the commit [...] > extends testcases a tiny bit. > gcc/testsuite/ChangeLog: > * gfortran.dg/gomp/target-update-1.f90: Likewise. That one addressed fixed <https://gcc.gnu.org/110178> "gfortran.dg/gomp/target-update-1.f90 fails after r14-1579-g4ede915d5dde93". > --- a/include/gomp-constants.h > +++ b/include/gomp-constants.h | #define GOMP_MAP_FLAG_PRESENT(GOMP_MAP_FLAG_SPECIAL_5 \ || GOMP_MAP_FLAG_SPECIAL_0) Couldn't/shouldn't we now get rid of this 'GOMP_MAP_FLAG_PRESENT'... | #define GOMP_MAP_FLAG_ALWAYS_PRESENT (GOMP_MAP_FLAG_SPECIAL_2 \ || GOMP_MAP_FLAG_PRESENT) ..., as it is only used in 'GOMP_MAP_FLAG_ALWAYS_PRESENT' here... > @@ -136,14 +136,6 @@ enum gomp_map_kind > device. */ > GOMP_MAP_ALWAYS_TOFROM = (GOMP_MAP_FLAG_SPECIAL_2 >| GOMP_MAP_TOFROM), > -/* Must already be present. */ > -GOMP_MAP_PRESENT_ALLOC = (GOMP_MAP_FLAG_PRESENT | > GOMP_MAP_ALLOC), > -/* Must already be present, copy to device. */ > -GOMP_MAP_PRESENT_TO =(GOMP_MAP_FLAG_PRESENT | GOMP_MAP_TO), > -/* Must already be present, copy from device. */ > -GOMP_MAP_PRESENT_FROM = (GOMP_MAP_FLAG_PRESENT | GOMP_MAP_FROM), > -/* Must already be present, copy to and from device. */ > -GOMP_MAP_PRESENT_TOFROM =(GOMP_MAP_FLAG_PRESENT | > GOMP_MAP_TOFROM), > /* Must already be present, unconditionally copy to device. */ > GOMP_MAP_ALWAYS_PRESENT_TO = (GOMP_MAP_FLAG_ALWAYS_PRESENT >| GOMP_MAP_TO), > @@ -205,7 +197,13 @@ enum gomp_map_kind > /* An attach or detach operation. Rewritten to the appropriate type > during > gimplification, depending on directive (i.e. "enter data" or > parallel/kernels region vs. "exit data"). */ > -GOMP_MAP_ATTACH_DETACH = (GOMP_MAP_LAST | 3) > +GOMP_MAP_ATTACH_DETACH = (GOMP_MAP_LAST | 3), > +/* Must already be present - all of following map to > GOMP_MAP_FORCE_PRESENT > + as no data transfer is needed. */ > +GOMP_MAP_PRESENT_ALLOC = (GOMP_MAP_LAST | 4), > +GOMP_MAP_PRESENT_TO =(GOMP_MAP_LAST | 5), > +GOMP_MAP_PRESENT_FROM = (GOMP_MAP_LAST | 6), > +GOMP_MAP_PRESENT_TOFROM =(GOMP_MAP_LAST | 7) >}; > > #define GOMP_MAP_COPY_TO_P(X) \ > @@ -243,7 +241,8 @@ enum gomp_map_kind >(((X) & GOMP_MAP_FLAG_SPECIAL_BITS) == GOMP_MAP_FLAG_FORCE) > > #define GOMP_MAP_PRESENT_P(X) \ > - (((X) & GOMP_MAP_FLAG_PRESENT) == GOMP_MAP_FLAG_PRESENT) > + (((X) & GOMP_MAP_FLAG_PRESENT) == GOMP_MAP_FLAG_PRESENT \ > + || (X) == GOMP_MAP_FORCE_PRESENT) ..., and this 'GOMP_MAP_PRESENT_P' should look for 'GOMP_MAP_FLAG_ALWAYS_PRESENT' instead of 'GOMP_MAP_FLAG_PRESENT' (plus 'GOMP_MAP_FORCE_PRESENT')? Instead of the current effective 'GOMP_MAP_FLAG_ALWAYS_PRESENT': GOMP_MAP_FLAG_SPECIAL_
Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'
Hi! On 2023-06-13T13:11:38+0200, Tobias Burnus wrote: > On 13.06.23 12:42, Thomas Schwinge wrote: >> On 2023-06-05T14:18:48+0200, I wrote: >>> OK to push the attached >>> "Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'"? >> >> Subject: [PATCH] Add >> 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90' >> >> gcc/testsuite/ >> * gfortran.fortran-torture/execute/math.f90: Enhance for optional >> OpenACC, OpenMP 'target' usage. > > I think it is more readable with a linebreak here and with "OpenACC > 'serial' and OpenMP ..." instead of "OpenACC, OpenMP". > > What I would like to see a hint somewhere in the commit log that the > libgomp files include the gfortran.fortran-torture file. I don't care > whether you add the hint before the changelog items as free text – or in > the bullet above (e.g. "as it is included in libgomp/testsuite") – or > after "New." in the following bullet list. > >> libgomp/ >> * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New. >> * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: >> Likewise. > >> --- >> .../gfortran.fortran-torture/execute/math.f90 | 23 +-- >> .../fortran-torture_execute_math.f90 | 4 >> .../fortran-torture_execute_math.f90 | 5 >> 3 files changed, 30 insertions(+), 2 deletions(-) >> create mode 100644 >> libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90 >> create mode 100644 >> libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90 >> >> diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 >> b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 >> index 17cc78f7a10..e71f669304f 100644 >> --- a/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 >> +++ b/gcc/testsuite/gfortran.fortran-torture/execute/math.f90 >> @@ -1,9 +1,14 @@ >> ! Program to test mathematical intrinsics >> + >> +! See also >> 'libgomp/testsuite/libgomp.fortran/fortran-torture_execute_math.f90'; thus >> the '!$omp' directives. >> +! See also >> 'libgomp/testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90'; >> thus the '!$acc' directives. > > Likewise here: it is not completely obvious that this file is 'include'd > by the other testcases. > > Maybe add a line "! This file is also included in:" and remove the "See > also" or some creative variant of it. > > Minor remark: The OpenMP part is OK, but strict reading of the spec > requires an "omp declare target' if a subroutine is in a different > compilation unit. And according the glossary, that's the case here. In > practice, it also works without as it is in the same translation unit. > (compilation unit = for C/C++: translation unit, for Fortran: > subprogram). I think the HPE/Cray compiler will complain, but maybe only > when used with modules and not with subroutine subprograms. (As many > compilers write a .mod file for modules, a late change of attributes can > be more problematic.) > > Otherwise LGTM. Thanks for the review. I've now pushed commit e76af2162c7b768ef0a913d485c51a80b08a1020 "Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90'", see attached. > PS: I assume that you have check it with both with an in-build-tree and > an in-install-tree testsuite run. I happened to have (..., but don't think it'd make a relevant difference here?) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From e76af2162c7b768ef0a913d485c51a80b08a1020 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 2 Jun 2023 23:11:00 +0200 Subject: [PATCH] Add 'libgomp.{,oacc-}fortran/fortran-torture_execute_math.f90' ..., via 'include'ing the existing 'gfortran.fortran-torture/execute/math.f90', which therefore is enhanced for optional OpenACC 'serial', OpenMP 'target' usage. gcc/testsuite/ * gfortran.fortran-torture/execute/math.f90: Enhance for optional OpenACC 'serial', OpenMP 'target' usage. libgomp/ * testsuite/libgomp.fortran/fortran-torture_execute_math.f90: New. * testsuite/libgomp.oacc-fortran/fortran-torture_execute_math.f90: Likewise. --- .../gfortran.fort
libgomp testsuite: Don't handle 'lang_link_flags'
Hi! Any objections to pushing the attached "libgomp testsuite: Don't handle 'lang_link_flags'"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From b3d33dc858fffeeed83735e55d86963e2297a78d Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 5 Jun 2023 11:45:41 +0200 Subject: [PATCH] libgomp testsuite: Don't handle 'lang_link_flags' ..., which as of recent commit 4bcb46b3ade1796c5a57b294f5cca25f00671cac "driver: Forward '-lgfortran', '-lm' to offloading compilation" is unused, and we don't anticipate any new usage. libgomp/ * testsuite/lib/libgomp.exp (libgomp_target_compile): Don't handle 'lang_link_flags'. --- libgomp/testsuite/lib/libgomp.exp | 4 1 file changed, 4 deletions(-) diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 1c4af9a8a2c..fb2bce38e28 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -277,10 +277,6 @@ proc libgomp_target_compile { source dest type options } { lappend options "ldflags=-L${blddir}/${lang_library_path}" } } -global lang_link_flags -if { [info exists lang_link_flags] } { - lappend options "ldflags=${lang_link_flags}" -} if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } { lappend options "libs=${gluefile}" -- 2.34.1
Align a 'OMP_TARGET_OFFLOAD=mandatory' diagnostic with others (was: Fix typo in 'libgomp.c/target-51.c' (was: [patch] OpenMP: Set default-device-var with OMP_TARGET_OFFLOAD=mandatory))
Hi! On 2023-06-14T11:42:22+0200, Tobias Burnus wrote: > On 14.06.23 10:09, Thomas Schwinge wrote: >> Let me know if I should also adjust the new 'target { ! offload_device }' >> diagnostic "[...] MANDATORY but only the host device is available" to >> include a comma before 'but', for consistency with the other existing >> diagnostics (cited above)? > > I think it makes sense to be consistent. Thus: Yes, please add the commas. I've pushed commit f2ef1dabbc18eb6efc0eb47bbb0eebbc6d72e09e "Align a 'OMP_TARGET_OFFLOAD=mandatory' diagnostic with others", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From f2ef1dabbc18eb6efc0eb47bbb0eebbc6d72e09e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 14 Jun 2023 12:44:05 +0200 Subject: [PATCH] Align a 'OMP_TARGET_OFFLOAD=mandatory' diagnostic with others On 2023-06-14T11:42:22+0200, Tobias Burnus wrote: > On 14.06.23 10:09, Thomas Schwinge wrote: >> Let me know if I should also adjust the new 'target { ! offload_device }' >> diagnostic "[...] MANDATORY but only the host device is available" to >> include a comma before 'but', for consistency with the other existing >> diagnostics (cited above)? > > I think it makes sense to be consistent. Thus: Yes, please add the commas. Fix-up for recent commit 18c8b56c7d67a9e37acf28822587786f0fc0efbc "OpenMP: Set default-device-var with OMP_TARGET_OFFLOAD=mandatory". libgomp/ * target.c (resolve_device): Align a 'OMP_TARGET_OFFLOAD=mandatory' diagnostic with others. * testsuite/libgomp.c/target-51.c: Adjust. --- libgomp/target.c| 4 ++-- libgomp/testsuite/libgomp.c/target-51.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libgomp/target.c b/libgomp/target.c index f1020fad601b..e39ef8f6e82a 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -152,8 +152,8 @@ resolve_device (int device_id, bool remapped) return NULL; if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY && gomp_get_num_devices () == 0) - gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY but only the host " - "device is available"); + gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " + "but only the host device is available"); else if (device_id == omp_invalid_device) gomp_fatal ("omp_invalid_device encountered"); else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) diff --git a/libgomp/testsuite/libgomp.c/target-51.c b/libgomp/testsuite/libgomp.c/target-51.c index cf9e690263e9..bbe9ade6e24b 100644 --- a/libgomp/testsuite/libgomp.c/target-51.c +++ b/libgomp/testsuite/libgomp.c/target-51.c @@ -9,7 +9,7 @@ /* See comment in target-50.c/target-50.c for why the output differs. */ -/* { dg-output ".*libgomp: OMP_TARGET_OFFLOAD is set to MANDATORY but only the host device is available.*" { target { ! offload_device } } } */ +/* { dg-output ".*libgomp: OMP_TARGET_OFFLOAD is set to MANDATORY, but only the host device is available.*" { target { ! offload_device } } } */ /* { dg-output ".*libgomp: OMP_TARGET_OFFLOAD is set to MANDATORY, but device not found.*" { target offload_device } } */ int -- 2.39.2
Re: [r14-1805 Regression] FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++98 (test for warnings, line 45) on Linux/x86_64
Hi! On 2023-06-15T08:50:59+0800, "haochen.jiang via Gcc-patches" wrote: > On Linux/x86_64, Actually: generally... > 9c03391ba447ff86038d6a34c90ae737c3915b5f is the first bad commit > commit 9c03391ba447ff86038d6a34c90ae737c3915b5f > Author: Thomas Schwinge > Date: Wed Jun 7 16:24:26 2023 +0200 > > Tighten 'dg-warning' alternatives in > 'c-c++-common/Wfree-nonheap-object{,-2,-3}.c' > > caused > > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++14 (test for excess > errors) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++14 (test for > warnings, line 45) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++17 (test for excess > errors) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++17 (test for > warnings, line 45) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++20 (test for excess > errors) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++20 (test for > warnings, line 45) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++98 (test for excess > errors) > FAIL: c-c++-common/Wfree-nonheap-object-3.c -std=gnu++98 (test for > warnings, line 45) Indeed. Sorry -- not sure how that escaped my testing. I already did have the fix in a different Git commit (but not in my testing build). Pushed to master branch commit df071fbd467f0cb379ef41d74792fc5e6c8c "Fix 'dg-warning' in 'c-c++-common/Wfree-nonheap-object-3.c' for C++", see attached. Grüße Thomas > with GCC configured with > > ../../gcc/configure > --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-1805/usr > --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld > --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl > --enable-libmpx x86_64-linux --disable-bootstrap > > To reproduce: > > $ cd {build_dir}/gcc && make check > RUNTESTFLAGS="dg.exp=c-c++-common/Wfree-nonheap-object-3.c > --target_board='unix{-m32}'" > $ cd {build_dir}/gcc && make check > RUNTESTFLAGS="dg.exp=c-c++-common/Wfree-nonheap-object-3.c > --target_board='unix{-m32\ -march=cascadelake}'" > $ cd {build_dir}/gcc && make check > RUNTESTFLAGS="dg.exp=c-c++-common/Wfree-nonheap-object-3.c > --target_board='unix{-m64}'" > $ cd {build_dir}/gcc && make check > RUNTESTFLAGS="dg.exp=c-c++-common/Wfree-nonheap-object-3.c > --target_board='unix{-m64\ -march=cascadelake}'" > > (Please do not reply to this email, for question about this report, contact > me at haochen dot jiang at intel.com) - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From df071fbd467f0cb379ef41d74792fc5e6c8c Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 7 Jun 2023 17:12:01 +0200 Subject: [PATCH] Fix 'dg-warning' in 'c-c++-common/Wfree-nonheap-object-3.c' for C++ [...]/c-c++-common/Wfree-nonheap-object-3.c:57:24: warning: 'malloc (dealloc_float)' attribute ignored with deallocation functions declared 'inline' [-Wattributes] [...]/c-c++-common/Wfree-nonheap-object-3.c:51:1: note: deallocation function declared here [...]/c-c++-common/Wfree-nonheap-object-3.c: In function 'void test_nowarn_int(int)': [...]/c-c++-common/Wfree-nonheap-object-3.c:25:20: warning: 'void __builtin_free(void*)' called on pointer 'p' with nonzero offset 4 [-Wfree-nonheap-object] [...]/c-c++-common/Wfree-nonheap-object-3.c:24:24: note: returned from 'int* alloc_int(int)' [...]/c-c++-common/Wfree-nonheap-object-3.c: In function 'void test_nowarn_long(int)': [...]/c-c++-common/Wfree-nonheap-object-3.c:45:18: warning: 'void dealloc_long(long int*)' called on pointer '' with nonzero offset 8 [-Wfree-nonheap-object] [...]/c-c++-common/Wfree-nonheap-object-3.c:44:26: note: returned from 'long int* alloc_long(int)' In function 'void dealloc_float(float*)', inlined from 'void test_nowarn_float(int)' at [...]/c-c++-common/Wfree-nonheap-object-3.c:68:19: [...]/c-c++-common/Wfree-nonheap-object-3.c:53:18: warning: 'void __builtin_free(void*)' called on pointer '' with nonzero offset 8 [-Wfree-nonheap-object] [...]/c-c++-common/Wfree-nonheap-object-3.c: In function 'void test_nowarn_float(int)': [...]/c-c++-common/Wfree-nonheap-object-3.c:67:28: note: returned from 'float* alloc_float(int)' PASS: c-c++-common/Wfree-nonheap-object-3.
Skip a number of C++ test cases for '-fno-exceptions' testing (was: Support in the GCC(/C++) test suites for '-fno-exceptions')
Hi! On 2023-06-06T20:31:21+0100, Jonathan Wakely wrote: > On Tue, 6 Jun 2023 at 20:14, Thomas Schwinge wrote: >> This issue comes up in context of me working on C++ support for GCN and >> nvptx target. Those targets shall default to '-fno-exceptions' -- or, >> "in other words", '-fexceptions' is not supported. (Details omitted >> here.) >> >> It did seem clear to me that with such a configuration it'll be hard to >> get clean test results. Then I found code in >> 'gcc/testsuite/lib/gcc-dg.exp:gcc-dg-prune': >> >> # If exceptions are disabled, mark tests expecting exceptions to be >> enabled >> # as unsupported. >> if { ![check_effective_target_exceptions_enabled] } { >> if [regexp "(^|\n)\[^\n\]*: error: exception handling disabled" >> $text] { >> return "::unsupported::exception handling disabled" >> } >> >> ..., which, in a way, sounds as if the test suite generally is meant to >> produce useful results for '-fno-exceptions', nice surprise! >> >> Running x86_64-pc-linux-gnu (not yet GCN, nvptx) 'make check' with: >> >> RUNTESTFLAGS='--target_board=unix/-fno-exceptions\{,-m32\}' >> >> ..., I find that indeed this does work for a lot of test cases, where we >> then get (random example): >> >> PASS: g++.dg/coroutines/pr99710.C (test for errors, line 23) >> -PASS: g++.dg/coroutines/pr99710.C (test for excess errors) >> +UNSUPPORTED: g++.dg/coroutines/pr99710.C: exception handling disabled >> >> ..., due to: >> >> [...]/g++.dg/coroutines/pr99710.C: In function 'task my_coro()': >> +[...]/g++.dg/coroutines/pr99710.C:18:10: error: exception handling >> disabled, use '-fexceptions' to enable >> [...]/g++.dg/coroutines/pr99710.C:23:7: error: await expressions are >> not permitted in handlers >> compiler exited with status 1 >> >> But, we're nowhere near clean test results: PASS -> FAIL as well as >> XFAIL -> XPASS regressions, due to 'error: exception handling disabled' >> precluding other diagnostics seems to be one major issue. >> >> Is there interest in me producing the obvious (?) changes to those test >> cases, such that compiler g++ as well as target library libstdc++ test >> results are reasonably clean? (If you think that's all "wasted effort", >> then I suppose I'll just locally ignore any FAILs/XPASSes/UNRESOLVEDs >> that appear in combination with >> 'UNSUPPORTED: [...]: exception handling disabled'.) > > I would welcome that for libstdc++. I do sometimes run the libstdc++ tests > with "unusual" options, like -fno-exceptions and -fno-rtti (e.g. today I've > been fixing FAILs that only happen with -fexcess-precision=standard). I > just manually ignore the tests that fail for -fno-exceptions, but it would > be great if they were automatically skipped as UNSUPPORTED. Per your and my changes a few days ago, we've already got libstdc++ covered, with the sole exception of: PASS: 27_io/basic_ostream/inserters_arithmetic/pod/23875.cc (test for excess errors) [-PASS:-]{+FAIL:+} 27_io/basic_ostream/inserters_arithmetic/pod/23875.cc execution test terminate called after throwing an instance of 'std::bad_cast' what(): std::bad_cast (Low priority for me.) Not having heard anything contrary regarding the compiler side of things, I've now been working on that, see below. > We already have a handful of tests that use #if __cpp_exceptions to make > those parts conditional on exception support. Yes, that's an option not for all but certainly for some test cases. (I'm not looking into that now -- but this may in fact be a good beginner-level task, will add to <https://gcc.gnu.org/wiki/EasyHacks>). >> Otherwise, a number of test cases need DejaGnu directives >> conditionalized on 'target exceptions_enabled'. Before I get to such things, even simpler: OK to push the attached "Skip a number of C++ test cases for '-fno-exceptions' testing"? >> (Or, >> 'error: exception handling disabled' made a "really late" diagnostic, so >> that it doesn't preclude other diagnostics? I'll have a look. Well, >> maybe something like: in fact do not default to '-fno-exceptions', but >> instead emit 'error: exception handling disabled' only if in a "really >> late" pass we run into exceptions-related constructs that we cannot >> support. Th
Skip a number of C++ "split files" test cases for '-fno-exceptions' testing (was: Skip a number of C++ test cases for '-fno-exceptions' testing (was: Support in the GCC(/C++) test suites for '-fno-exc
Hi! On 2023-06-15T17:15:54+0200, I wrote: > On 2023-06-06T20:31:21+0100, Jonathan Wakely wrote: >> On Tue, 6 Jun 2023 at 20:14, Thomas Schwinge wrote: >>> This issue comes up in context of me working on C++ support for GCN and >>> nvptx target. Those targets shall default to '-fno-exceptions' -- or, >>> "in other words", '-fexceptions' is not supported. (Details omitted >>> here.) >>> >>> It did seem clear to me that with such a configuration it'll be hard to >>> get clean test results. Then I found code in >>> 'gcc/testsuite/lib/gcc-dg.exp:gcc-dg-prune': >>> >>> # If exceptions are disabled, mark tests expecting exceptions to be >>> enabled >>> # as unsupported. >>> if { ![check_effective_target_exceptions_enabled] } { >>> if [regexp "(^|\n)\[^\n\]*: error: exception handling disabled" >>> $text] { >>> return "::unsupported::exception handling disabled" >>> } >>> >>> ..., which, in a way, sounds as if the test suite generally is meant to >>> produce useful results for '-fno-exceptions', nice surprise! >>> >>> Running x86_64-pc-linux-gnu (not yet GCN, nvptx) 'make check' with: >>> >>> RUNTESTFLAGS='--target_board=unix/-fno-exceptions\{,-m32\}' >>> >>> ..., I find that indeed this does work for a lot of test cases, where we >>> then get (random example): >>> >>> PASS: g++.dg/coroutines/pr99710.C (test for errors, line 23) >>> -PASS: g++.dg/coroutines/pr99710.C (test for excess errors) >>> +UNSUPPORTED: g++.dg/coroutines/pr99710.C: exception handling disabled >>> >>> ..., due to: >>> >>> [...]/g++.dg/coroutines/pr99710.C: In function 'task my_coro()': >>> +[...]/g++.dg/coroutines/pr99710.C:18:10: error: exception handling >>> disabled, use '-fexceptions' to enable >>> [...]/g++.dg/coroutines/pr99710.C:23:7: error: await expressions are >>> not permitted in handlers >>> compiler exited with status 1 >>> >>> But, we're nowhere near clean test results: PASS -> FAIL as well as >>> XFAIL -> XPASS regressions, due to 'error: exception handling disabled' >>> precluding other diagnostics seems to be one major issue. >>> >>> Is there interest in me producing the obvious (?) changes to those test >>> cases, such that compiler g++ as well as target library libstdc++ test >>> results are reasonably clean? (If you think that's all "wasted effort", >>> then I suppose I'll just locally ignore any FAILs/XPASSes/UNRESOLVEDs >>> that appear in combination with >>> 'UNSUPPORTED: [...]: exception handling disabled'.) >> >> I would welcome that for libstdc++. [...] > Not having heard anything contrary regarding the compiler side of things, > I've now been working on that, see below. >>> Otherwise, a number of test cases need DejaGnu directives >>> conditionalized on 'target exceptions_enabled'. > > Before I get to such things, even simpler: OK to push the attached > "Skip a number of C++ test cases for '-fno-exceptions' testing"? Similarly, OK to push the attached "Skip a number of C++ "split files" test cases for '-fno-exceptions' testing"? Grüße Thomas >>> (Or, >>> 'error: exception handling disabled' made a "really late" diagnostic, so >>> that it doesn't preclude other diagnostics? I'll have a look. Well, >>> maybe something like: in fact do not default to '-fno-exceptions', but >>> instead emit 'error: exception handling disabled' only if in a "really >>> late" pass we run into exceptions-related constructs that we cannot >>> support. That'd also avoid PASS -> UNSUPPORTED "regressions" when >>> exception handling in fact gets optimized away, for example. I like that >>> idea, conceptually -- but is it feasible to implement..?) >> >> IMHO just [...] using [an effective target keyword] in test >> selectors seems simpler, and doesn't require changes to the compiler, just >> the tests. > > I still like the idea, but yes, I've mentally put it on file "for later" > (ha, ha, ha...) -- it doesn't seem obvious to implement. > > > Grüße > Thomas - Siemens Ele
Skip a number of C++ 'g++.dg/tree-prof/' test cases for '-fno-exceptions' testing (was: Skip a number of C++ test cases for '-fno-exceptions' testing (was: Support in the GCC(/C++) test suites for '-f
Hi! On 2023-06-15T17:15:54+0200, I wrote: > On 2023-06-06T20:31:21+0100, Jonathan Wakely wrote: >> On Tue, 6 Jun 2023 at 20:14, Thomas Schwinge wrote: >>> This issue comes up in context of me working on C++ support for GCN and >>> nvptx target. Those targets shall default to '-fno-exceptions' -- or, >>> "in other words", '-fexceptions' is not supported. (Details omitted >>> here.) >>> >>> It did seem clear to me that with such a configuration it'll be hard to >>> get clean test results. Then I found code in >>> 'gcc/testsuite/lib/gcc-dg.exp:gcc-dg-prune': >>> >>> # If exceptions are disabled, mark tests expecting exceptions to be >>> enabled >>> # as unsupported. >>> if { ![check_effective_target_exceptions_enabled] } { >>> if [regexp "(^|\n)\[^\n\]*: error: exception handling disabled" >>> $text] { >>> return "::unsupported::exception handling disabled" >>> } >>> >>> ..., which, in a way, sounds as if the test suite generally is meant to >>> produce useful results for '-fno-exceptions', nice surprise! >>> >>> Running x86_64-pc-linux-gnu (not yet GCN, nvptx) 'make check' with: >>> >>> RUNTESTFLAGS='--target_board=unix/-fno-exceptions\{,-m32\}' >>> >>> ..., I find that indeed this does work for a lot of test cases, where we >>> then get (random example): >>> >>> PASS: g++.dg/coroutines/pr99710.C (test for errors, line 23) >>> -PASS: g++.dg/coroutines/pr99710.C (test for excess errors) >>> +UNSUPPORTED: g++.dg/coroutines/pr99710.C: exception handling disabled >>> >>> ..., due to: >>> >>> [...]/g++.dg/coroutines/pr99710.C: In function 'task my_coro()': >>> +[...]/g++.dg/coroutines/pr99710.C:18:10: error: exception handling >>> disabled, use '-fexceptions' to enable >>> [...]/g++.dg/coroutines/pr99710.C:23:7: error: await expressions are >>> not permitted in handlers >>> compiler exited with status 1 >>> >>> But, we're nowhere near clean test results: PASS -> FAIL as well as >>> XFAIL -> XPASS regressions, due to 'error: exception handling disabled' >>> precluding other diagnostics seems to be one major issue. >>> >>> Is there interest in me producing the obvious (?) changes to those test >>> cases, such that compiler g++ as well as target library libstdc++ test >>> results are reasonably clean? (If you think that's all "wasted effort", >>> then I suppose I'll just locally ignore any FAILs/XPASSes/UNRESOLVEDs >>> that appear in combination with >>> 'UNSUPPORTED: [...]: exception handling disabled'.) >> >> I would welcome that for libstdc++. [...] > Not having heard anything contrary regarding the compiler side of things, > I've now been working on that, see below. >>> Otherwise, a number of test cases need DejaGnu directives >>> conditionalized on 'target exceptions_enabled'. > > Before I get to such things, even simpler: OK to push the attached > "Skip a number of C++ test cases for '-fno-exceptions' testing"? Similarly, OK to push the attached "Skip a number of C++ 'g++.dg/tree-prof/' test cases for '-fno-exceptions' testing"? Grüße Thomas >>> (Or, >>> 'error: exception handling disabled' made a "really late" diagnostic, so >>> that it doesn't preclude other diagnostics? I'll have a look. Well, >>> maybe something like: in fact do not default to '-fno-exceptions', but >>> instead emit 'error: exception handling disabled' only if in a "really >>> late" pass we run into exceptions-related constructs that we cannot >>> support. That'd also avoid PASS -> UNSUPPORTED "regressions" when >>> exception handling in fact gets optimized away, for example. I like that >>> idea, conceptually -- but is it feasible to implement..?) >> >> IMHO just [...] using [an effective target keyword] in test >> selectors seems simpler, and doesn't require changes to the compiler, just >> the tests. > > I still like the idea, but yes, I've mentally put it on file "for later" > (ha, ha, ha...) -- it doesn't seem obvious to implement. > > > Grüße > Thomas - Siemens E
Re: [committed] - Re: [patch] OpenMP/Fortran: Non-rectangular loops with constant steps other than 1 or -1 [PR107424]
Hi Tobias! On 2023-07-19T10:26:12+0200, Tobias Burnus wrote: > Now committed as Rev. r14-2634-g85da0b40538fb0 On devel/omp/gcc-13 branch, the corresponding commit b003e6511754dce475f7f5b0c5cd887a177e41b3 "OpenMP/Fortran: Non-rectangular loops with constant steps other than 1 or -1 [PR107424]" introduces a regression: PASS: libgomp.fortran/loop-transforms/unroll-2.f90 -O0 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/loop-transforms/unroll-2.f90 -O0 execution test Etc. spawn [open ...] 4 8 10 11 Program aborted. Backtrace: #0 0x400f9c in test at [...]/libgomp.fortran/loop-transforms/unroll-2.f90:85 #1 0x400fd3 in main at [...]/libgomp.fortran/loop-transforms/unroll-2.f90:59 Grüße Thomas > Changes: > > * I missed to updated another 'sorry' (msg wording change) - now fixed; > I also added it to the sorry-testcase file non-rectangular-loop-5.f90. > > * I decided to retire the PR as several issues have been fixed and the > original title did not fit any more. The remaining issue is now tracked > in PR110735 (i.e. handling step != const, both the generic and possibly > a simpler special case). > > * I added a link to the PR to libgomp.texi such that one can find out > what is only partially supported for Fortran. > > Thanks, > > Tobias > > PS: Otherwise, the following still applies: > > On 18.07.23 14:11, Tobias Burnus wrote: >> Comments regarding the validity of the Fortran assumptions are welcome! >> >> This patch now uses a 'simple' loop for OpenMP loops with >> a constant loop-step size. Before, it only did so for step = ±1. >> (Otherwise, a count variable is used from which the original >> loop index variable is calculated from.) >> >> For details, see the attached patch or >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107424#c12 >> (comment 12 + 14 plus the email linked in comment 12). >> >> Comments? Remarks? If there are none, I will relatively soonish >> commit the attached patch to mainline, only. > commit 85da0b40538fb0d17d89de1e7905984668e3dfef > Author: Tobias Burnus > Date: Wed Jul 19 10:18:49 2023 +0200 > > OpenMP/Fortran: Non-rectangular loops with constant steps other than 1 or > -1 [PR107424] > > Before this commit, gfortran produced with OpenMP for 'do i = 1,10,2' > the code > for (count.0 = 0; count.0 < 5; count.0 = count.0 + 1) > i = count.0 * 2 + 1; > > While such an inner loop can be collapsed, a non-rectangular could not. > With this commit and for all constant loop steps, a simple loop such > as 'for (i = 1; i <= 10; i = i + 2)' is created. (Before only for the > constant steps of 1 and -1.) > > The constant step permits to know the direction (increasing/decreasing) > that is required for the loop condition. > > The new code is only valid if one assumes no overflow of the loop > variable. > However, the Fortran standard can be read that this must be ensured by > the user. Namely, the Fortran standard requires (F2023, 10.1.5.2.4): > "The execution of any numeric operation whose result is not defined by > the arithmetic used by the processor is prohibited." > > And, for DO loops, F2023's "11.1.7.4.3 The execution cycle" has the > following: The number of loop iterations handled by an iteration count, > which would permit code like 'do i = huge(i)-5, huge(i),4'. However, > in step (3), this count is not only decremented by one but also: > "... The DO variable, if any, is incremented by the value of the > incrementation parameter m3." > And for the example above, 'i' would be 'huge(i)+3' in the last > execution cycle, which exceeds the largest model number and should > render the example as invalid. > > PR fortran/107424 > > gcc/fortran/ChangeLog: > > * trans-openmp.cc (gfc_nonrect_loop_expr): Accept all > constant loop steps. > (gfc_trans_omp_do): Likewise; use sign to determine > loop direction. > > libgomp/ChangeLog: > > * libgomp.texi (Impl. Status 5.0): Add link to new PR110735. > * testsuite/libgomp.fortran/non-rectangular-loop-1.f90: Enable > commented tests. > * testsuite/libgomp.fortran/non-rectangular-loop-1a.f90: Remove > test file; tests are in non-rectangular-loop-1.f90. > * testsuite/libgomp.fortran/non-rectangular-loop-5.f90: Change > testcase
Re: [PATCH, OpenACC 2.7, v2] Implement host_data must have use_device clause requirement
Hi Chung-Lin! On 2023-07-13T18:54:00+0800, Chung-Lin Tang wrote: > On 2023/6/16 5:13 PM, Thomas Schwinge wrote: >> OK with one small change, please -- unless there's a reason for doing it >> this way: [...] > I've adjusted the Fortran implementation as you described. Yes, I agree this > way > more fits current Fortran FE conventions. > > I've re-tested the attached v2 patch, will commit later this week if no major > objections. ACK, thanks. Grüße Thomas > gcc/c/ChangeLog: > > * c-parser.cc (c_parser_oacc_host_data): Add checking requiring OpenACC > host_data construct to have an use_device clause. > > gcc/cp/ChangeLog: > > * parser.cc (cp_parser_oacc_host_data): Add checking requiring OpenACC > host_data construct to have an use_device clause. > > gcc/fortran/ChangeLog: > > * openmp.cc (resolve_omp_clauses): Add checking requiring > OpenACC host_data construct to have an use_device clause. > > gcc/testsuite/ChangeLog: > > * c-c++-common/goacc/host_data-2.c: Adjust testcase. > * gfortran.dg/goacc/host_data-error.f90: New testcase. > * gfortran.dg/goacc/pr71704.f90: Adjust testcase. > diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc > index 24a6eb6e459..80920b31f83 100644 > --- a/gcc/c/c-parser.cc > +++ b/gcc/c/c-parser.cc > @@ -18461,8 +18461,13 @@ c_parser_oacc_host_data (location_t loc, c_parser > *parser, bool *if_p) >tree stmt, clauses, block; > >clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, > -"#pragma acc host_data"); > - > +"#pragma acc host_data", false); > + if (!omp_find_clause (clauses, OMP_CLAUSE_USE_DEVICE_PTR)) > +{ > + error_at (loc, "% construct requires % > clause"); > + return error_mark_node; > +} > + clauses = c_finish_omp_clauses (clauses, C_ORT_ACC); >block = c_begin_omp_parallel (); >add_stmt (c_parser_omp_structured_block (parser, if_p)); >stmt = c_finish_oacc_host_data (loc, clauses, block); > diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc > index 5e2b5cba57e..beb5b632e5e 100644 > --- a/gcc/cp/parser.cc > +++ b/gcc/cp/parser.cc > @@ -45895,8 +45895,15 @@ cp_parser_oacc_host_data (cp_parser *parser, > cp_token *pragma_tok, bool *if_p) >unsigned int save; > >clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, > - "#pragma acc host_data", pragma_tok); > - > + "#pragma acc host_data", pragma_tok, > + false); > + if (!omp_find_clause (clauses, OMP_CLAUSE_USE_DEVICE_PTR)) > +{ > + error_at (pragma_tok->location, > + "% construct requires % clause"); > + return error_mark_node; > +} > + clauses = finish_omp_clauses (clauses, C_ORT_ACC); >block = begin_omp_parallel (); >save = cp_parser_begin_omp_structured_block (parser); >cp_parser_statement (parser, NULL_TREE, false, if_p); > diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc > index 8efc4b3ecfa..f7af02845de 100644 > --- a/gcc/fortran/openmp.cc > +++ b/gcc/fortran/openmp.cc > @@ -8764,6 +8764,12 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses > *omp_clauses, > "% clause", &omp_clauses->detach->where); > } > > + if (openacc > + && code->op == EXEC_OACC_HOST_DATA > + && omp_clauses->lists[OMP_LIST_USE_DEVICE] == NULL) > +gfc_error ("% construct at %L requires % > clause", > +&code->loc); > + >if (omp_clauses->assume) > gfc_resolve_omp_assumptions (omp_clauses->assume); > } > diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-2.c > b/gcc/testsuite/c-c++-common/goacc/host_data-2.c > index b3093e575ff..862a764eb3a 100644 > --- a/gcc/testsuite/c-c++-common/goacc/host_data-2.c > +++ b/gcc/testsuite/c-c++-common/goacc/host_data-2.c > @@ -8,7 +8,9 @@ void > f (void) > { >int v2 = 3; > -#pragma acc host_data copy(v2) /* { dg-error ".copy. is not valid for > ..pragma acc host_data." } */ > +#pragma acc host_data copy(v2) > + /* { dg-error ".copy. is not valid for ..pragma acc host_data." "" { > target *-*-* } .-1 } */ > + /* { dg-error ".host_data. construct requires .use_device. clause" "" { > target *-*-* } .-2 } */ >; > > #pragma acc host_data use_device(v2) > @@ -20,6 +22,9 @@ f (void) >/* { dg-error ".use_device_ptr. va
Re: [PATCH, OpenACC 2.7] readonly modifier support in front-ends
:n)) > + enddo > + !$acc end parallel > +end subroutine foo > + > +program main > + integer :: i, n = 32, a(32) > + integer :: b(32) > + !$acc parallel copyin(readonly: a(:32), b(:n)) > + do i = 1,32 > + !$acc cache (readonly: a(:), b(:n)) > + enddo > + !$acc end parallel > +end program main > + > +! { dg-final { scan-tree-dump-times "(?n)#pragma acc parallel > map\\(readonly,to:\\*\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) parm.*data > \\\[len: .+\\\]\\) .+ map\\(readonly,to:\\*\\(integer\\(kind=4\\)\\\[0:\\\] > \\*\\) parm.*data \\\[len: .+\\\]\\)" 1 "original" } } > +! { dg-final { scan-tree-dump-times "(?n)#pragma acc parallel > map\\(readonly,to:a\\\[\\(\\(integer\\(kind=8\\)\\) parm.*data - > \\(integer\\(kind=8\\)\\) &a\\) / 4\\\] \\\[len: .+\\\]\\) .+ > map\\(readonly,to:b\\\[\\(\\(integer\\(kind=8\\)\\) parm.*data - > \\(integer\\(kind=8\\)\\) &b\\) / 4\\\] \\\[len: .+\\\]\\)" 1 "original" } } > +! { dg-final { scan-tree-dump-times "(?n)#pragma acc cache > \\(readonly:\\*\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) parm.*data \\\[len: > .+\\\]\\) \\(readonly:\\*\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) parm.*data > \\\[len: .+\\\]\\);" 2 "original" } } You're scanning only one of the two 'cache' directives? If that's intentional, please add a comment, why. If not, add the missing scanning. Given the peculiarities of the Fortran parsing, where first all directive's clauses are collected and then translated en bloc, I suggest to extent the 'copyin' test cases to have several 'copyin' clauses, some with, some without 'readonly' modifier, so we make sure that 'readonly' is set only for the appropriate ones. Generally, in addition to just 'parallel' compute construct, please spread this out a bit, to also cover 'kernels', 'serial' compute constructs, and the 'data' construct. Generally, please also add testing for the 'declare' directive with 'copyin' with 'readonly' modifier -- and implement handling in case that's not implicitly covered? (..., but please don't let you be dragged into a number of pre-existing issues with OpenACC 'declare' -- I hope the 'readonly' handling is straightforward to test for.) Given that per the implementation in the front ends, the handling of 'readonly' obviously -- famous last words? ;-) -- is specific to 'copyin', it's probably OK to not have test cases to verify that the 'readonly' modifier is rejected for other data clauses? > --- a/gcc/tree-pretty-print.cc > +++ b/gcc/tree-pretty-print.cc > @@ -905,6 +905,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int > spc, dump_flags_t flags) > > case OMP_CLAUSE_MAP: >pp_string (pp, "map("); > + if (OMP_CLAUSE_MAP_READONLY (clause)) > + pp_string (pp, "readonly,"); >switch (OMP_CLAUSE_MAP_KIND (clause)) > { > case GOMP_MAP_ALLOC: > @@ -1075,6 +1077,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int > spc, dump_flags_t flags) > > case OMP_CLAUSE__CACHE_: >pp_string (pp, "("); > + if (OMP_CLAUSE__CACHE__READONLY (clause)) > + pp_string (pp, "readonly:"); >dump_generic_node (pp, OMP_CLAUSE_DECL (clause), >spc, flags, false); >goto print_clause_size; > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -1813,6 +1813,14 @@ class auto_suppress_location_wrappers > #define OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE(NODE) \ >(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.addressable_flag) > > +/* Nonzero if OpenACC 'readonly' modifier set, used for 'copyin'. */ > +#define OMP_CLAUSE_MAP_READONLY(NODE) \ > + TREE_READONLY (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) > + > +/* Same as above, for use in OpenACC cache directives. */ > +#define OMP_CLAUSE__CACHE__READONLY(NODE) \ > + TREE_READONLY (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CACHE_)) I'm not sure if these special accessor functions are actually useful, or we should just directly use 'TREE_READONLY' instead? We're only using them in contexts where it's clear that the 'OMP_CLAUSE_SUBCODE_CHECK' is satisfied, for example. Also, for the new use for OMP clauses, update 'gcc/tree.h:TREE_READONLY', and in 'gcc/tree-core.h' for 'readonly_flag' the "table lists the uses of each of the above flags". Setting 'TREE_READONLY' of the 'OMP_CLAUSE_DECL' instead of the clause itself isn't the right thing to do -- or is it, and might already indicate to the middle end the desired semantics? But does it maybe conflict with front end/language-level use of 'TREE_READONLY' for 'const' etc. (I suppose), and thus diagnostics for mismatches? I mean: int a; #pragma acc parallel copyin(readonly: a) { int *b = &a; ... should still continue to work (valid as long as '*b' isn't written to), so should not raise any "warning: initialization discards ‘const’ qualifier from pointer target type" diagnostics. But if that's not a problem (I don't know how 'TREE_READONLY' is used elsewhere), maybe that's something to give a thought to? Or, early in the middle end, propagate 'TREE_READONLY' from the clause to its 'OMP_CLAUSE_DECL'? Might need to 'unshare_expr' the latter for modification and use in the associated region only? Just some quick thoughts, obviously without any detailed analysis. ;-) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
RE : Cfuture Manpower Hiring
Hi, I trust this email finds you well. Our Organization hiring the best and the brightest talent in the industry. We hire individuals with a strong sense of pride in their performance, team spirit, and a desire to excel. To provide our clients with Professional, Quality and value added services ensuring customer delight, thus building a long term relationship rather than short term gains. Why you have to prefer us; *TAT duration- Just 24 hours *Deadline to close the position is one week(depends upon Client procedure) *Availability - 6 days in a week, all available on call round the clock. *Sources Access to the database from beginner to top management level Or service charges are as below; A) The professional fee will be calculated as a percentage of the incumbent's gross annual salary @ 8.33% on annual CTC which excludes GST. B) Payment should be made within 30 days from the date of submission of invoice C) Replacement of candidate who leave the organization within 90 days of joining Thanks in advance. Assuring you the best of our efforts to begin a new relationship. Would request you to revert with your confirmation which enables us to start the recruitment process. We look forward to receiving your detailed job inquiry with specifications and other parameters to enable us to submit our suitable and competitive profiles. Kind Regards, Vinod Thomas Bangalore If you do not wish to receive future emails from us, please reply as "opt-out"
List myself as "nvptx port" maintainer (was: Thomas Schwinge appointed co-maintainer of the nvptx backend)
Hi! On 2023-07-19T23:41:47+0200, Gerald Pfeifer wrote: > It's my pleasure to announce Thomas Schwinge as co-maintainer of the > nvptx backend. > > Congratulations and Happy Hacking, Thomas! Please go ahead and update > MAINTAINERS accordingly. > > Gerald (on behalf of the steering committee) Thanks! I've pushed commit 28e3d361ba0cfa7ea2f90706159a144eaf4b650e 'List myself as "nvptx port" maintainer', see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 28e3d361ba0cfa7ea2f90706159a144eaf4b650e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 25 Jul 2023 21:17:52 +0200 Subject: [PATCH] List myself as "nvptx port" maintainer * MAINTAINERS: List myself as "nvptx port" maintainer. --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b626d89fe34..e9b11b43a0f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -102,6 +102,7 @@ nds32 port Shiva Chen nios2 port Chung-Lin Tang nios2 port Sandra Loosemore nvptx port Tom de Vries +nvptx port Thomas Schwinge or1k port Stafford Horne pdp11 port Paul Koning powerpcspe port Andrew Jenner -- 2.34.1
Re: [patch] OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect
mp;dst_off_sz2) > + || __builtin_mul_overflow (src_offsets[2], element_size, > + &src_off_sz2)) > + return EINVAL; > + ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], > volume[0], > + dst, dst_off_sz2, dst_offsets[1], > + dst_offsets[0], dst_sz2, dst_dimensions[1], > + src, src_off_sz2, src_offsets[1], > + src_offsets[0], src_sz2, src_dimensions[1]); > + if (ret != -1) > + return ret ? 0 : EINVAL; > +} > >for (i = 1; i < num_dims; i++) > if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) > @@ -4585,7 +4670,7 @@ omp_target_memcpy_rect_worker (void *dst, const void > *src, size_t element_size, > volume + 1, dst_offsets + 1, > src_offsets + 1, dst_dimensions + 1, > src_dimensions + 1, dst_devicep, > -src_devicep); > +src_devicep, tmp_size, tmp); >if (ret) > return ret; >dst_off += dst_slice; > @@ -4608,9 +4693,6 @@ omp_target_memcpy_rect_check (void *dst, const void > *src, int dst_device_num, >if (ret) > return ret; > > - if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != > *dst_devicep) > -return EINVAL; > - >return 0; > } > > @@ -4624,18 +4706,36 @@ omp_target_memcpy_rect_copy (void *dst, const void > *src, >struct gomp_device_descr *dst_devicep, >struct gomp_device_descr *src_devicep) > { > - if (src_devicep) > + size_t tmp_size = 0; > + void *tmp = NULL; > + bool lock_src; > + bool lock_dst; > + > + lock_src = (src_devicep > + && (!dst_devicep > + || src_devicep == dst_devicep > + || !(src_devicep->capabilities > +& GOMP_OFFLOAD_CAP_SHARED_MEM))); Similar doubt than above re "'GOMP_OFFLOAD_CAP_SHARED_MEM' actually reachable"? > + lock_dst = (dst_devicep > + && (!lock_src > + || (src_devicep != dst_devicep > + && !(dst_devicep->capabilities > +& GOMP_OFFLOAD_CAP_SHARED_MEM; > + if (lock_src) > gomp_mutex_lock (&src_devicep->lock); > - else if (dst_devicep) > + if (lock_dst) > gomp_mutex_lock (&dst_devicep->lock); (Pre-existing issue, and I've not myself tried to figure out the details at this time -- why do we actually lock the devices here, and in similar other places?) >int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, > volume, dst_offsets, src_offsets, > dst_dimensions, src_dimensions, > -dst_devicep, src_devicep); > - if (src_devicep) > +dst_devicep, src_devicep, > +&tmp_size, &tmp); > + if (lock_src) > gomp_mutex_unlock (&src_devicep->lock); > - else if (dst_devicep) > + if (lock_dst) > gomp_mutex_unlock (&dst_devicep->lock); > + if (tmp) > +free (tmp); > >return ret; > } > @@ -4976,6 +5076,8 @@ gomp_load_plugin_for_device (struct gomp_device_descr > *device, >DLSYM (free); >DLSYM (dev2host); >DLSYM (host2dev); > + DLSYM (memcpy2d); > + DLSYM (memcpy3d); With 'DLSYM' used here, won't that fail if these symbols don't actually exist (like for 'libgomp/plugin/plugin-gcn.c')? I'm attaching the humble beginnings of a follow-on patch; feel free to use. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From ed85b983970fb42be2e1db172a0d7e20c484ed06 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 27 Jul 2023 15:46:26 +0200 Subject: [PATCH] Re: OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect --- include/cuda/cuda.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h index 09c3c2b8dbe..d556a130379 100644 --- a/include/cuda/cuda.h +++ b/include/cuda/cuda.h @@ -147,7 +147,7 @@ typedef struct { size_t dstXInBytes, dstY; CUmemorytype dstMemoryType; - const void *dstHost; + void *dstHost; CUdeviceptr dstDevice; CUarray dstArray; size_t dstPitch; @@ -162,16 +162,16 @@ typedef struct { const void *srcHost; CUdeviceptr srcDevice; CUarray srcArray; - void *dummy; + void *reserved0; size_t srcPitch, srcHeight; size_t dstXInBytes, dstY, dstZ; size_t dstLOD; CUmemorytype dstMemoryType; - const void *dstHost; + void *dstHost; CUdeviceptr dstDevice; CUarray dstArray; - void *dummy2; + void *reserved1; size_t dstPitch, dstHeight; size_t WidthInBytes, Height, Depth; -- 2.34.1
[PING^2] nvptx: forward '-v' command-line option to assembler, linker
Hi Tom! Ping. Grüße Thomas On 2022-06-07T17:41:16+0200, I wrote: > Hi! > > On 2022-05-30T09:06:21+0200, Tobias Burnus wrote: >> On 29.05.22 22:49, Thomas Schwinge wrote: >>> Not sure if that's what you had in mind, but what do you think about the >>> attached "nvptx: forward '-v' command-line option to assembler, linker"? >>> OK to push to GCC master branch (after merging >>> <https://github.com/MentorEmbedded/nvptx-tools/pull/37> >>> "Put '-v' verbose output onto stderr instead of stdout")? >> >> I was mainly thinking of some way to have it available — which >> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the >> nvptx-tools patch actually makes use of the '-v'.) > > (Merged a week ago.) > >> If I understand your patch correctly, this patch now causes 'gcc -v' to >> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already >> outputs a lot of lines and those lines can be helpful to understand what >> happens and what not. > > ACK. > >> Tom, your thoughts on this? > > Ping. > > > Grüße > Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 29 May 2022 22:31:43 +0200 Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker For example, for offloading compilation with '-save-temps -v', before vs. after word-diff then looks like: [...] [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s {+Verifying sm_30 code with sm_35 code generation.+} {+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+} [...] [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc {+collect2 version 12.0.1 20220428 (experimental)+} {+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+} {+Linking ./a.xnvptx-none.mkoffload.o as 0+} {+trying lib libc.a+} {+trying lib libgcc.a+} {+trying lib libgomp.a+} {+Resolving abort+} {+Resolving acc_on_device+} {+Linking libgomp.a::oacc-init.o/ as 1+} {+Linking libc.a::lib_a-abort.o/ as 2+} [...] (This depends on <https://github.com/MentorEmbedded/nvptx-tools/pull/37> "Put '-v' verbose output onto stderr instead of stdout".) gcc/ * config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define. --- gcc/config/nvptx/nvptx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index ed72c253191..b184f1d0150 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -27,6 +27,13 @@ /* Run-time Target. */ +/* Assembler supports '-v' option; handle similar to + '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */ +#define ASM_SPEC "%{v}" + +/* Linker supports '-v' option. */ +#define LINK_SPEC "%{v}" + #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () -- 2.25.1
[PING] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)
Hi Tom! Ping. Grüße Thomas On 2022-06-15T23:18:10+0200, I wrote: > Hi Tom! > > On 2022-05-13T16:20:14+0200, I wrote: >> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc wrote: >>> On 2/4/22 08:21, Thomas Schwinge wrote: >>>> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" >>>> wrote: >>>>> I've tested this using (recommended) driver 470.94 on boards: >> >>>>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { >>>>> GOMP_NVPTX_JIT=-O0, }. >>>> >>>> Do you use separate (nvptx-none offload target only?) builds for >>>> different '-mptx' variants (likewise: '-misa'), or have you hacked up the >>>> multilib configuration? >>> >>> Neither, I'm using --target_board=unix/foffload= for that. >> >> ACK, I see. So these flags then only affect GCC/nvptx code generation >> for the actual user code (here: GCC libgomp test cases), but for the >> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp -- >> the latter especially relevant for OpenMP), it uses PTX code from one of >> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'. >> >> Meaning, one can't just use such a flag for "completely building code" >> for a specific configuration. Random example, >> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target >> libraries aren't being built for '-march=sm_75' multilib, >> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib, >> which isn't '-march=sm_75'. >> >> >>> ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS' >>>> etc., I suppose?) Should we add a few representative configurations to >>>> be built by default? And/or, should we have a way to 'configure' per >>>> user needs (I suppose: '--with-multilib-list=[...]', as supported for a >>>> few other targets?)? (I see there's also a new >>>> '--with-multilib-generator=[...]', haven't looked in detail.) No matter >>>> which way: again, combinatorial explosion is a problem, of course... >>> >>> As far as I know, the gcc build doesn't finish when switching default to >>> higher than sm_35, so there's little point to go to a multilib setup at >>> this point. But once we fix that, we could reconsider, otherwise, >>> things are likely to regress again. >> >> As far as I remember, several issues have been fixed. Still waiting for >> Roger's "middle-end: Support ABIs that pass FP values as wider integers" >> or something similar, but that PR104489 issue is being worked around by >> "Limit HFmode support to mexperimental", if I got that right. >> >> Now I'm not suggesting we should now enable all or any random GCC/nvptx >> multilibs, to get all these variants of GCC/nvptx target libraries built; >> especially also given that GCC/nvptx code generation currently doesn't >> make too much use of the new capabilities. >> >> However, we do have a specific request that a customer would like to be >> able to change at GCC 'configure' time the GCC/nvptx default multilib >> (including that being used for building corresponding GCC/nvptx target >> libraries). >> >> Per 'gcc/doc/install.texi', I do see that some GCC targets allow for >> GCC 'configure'-time '--with-multilib-list=[...]', or >> '--with-multilib-generator=[...]', and I suppose we could be doing >> something similar? But before starting implementing, I'd like your >> input, as you'll be the one to approve in the end. And/or, maybe you've >> already made up your own ideas about that? > > So, instead of "random GCC/nvptx multilib configuration" (last > paragraph), I've come up with a way to implement our customer's request > (second last paragraph): 'configure' GCC/nvptx '--with-arch=sm_70'. > > I think I've implemented this in a way so that "random GCC/nvptx multilib > configuration" may eventually be implemented on top of that. For easy > review/testing I've split my changes into three commits, see attached > "nvptx: Make default '-misa=sm_30' explicit", > "nvptx: Introduce dummy multilib option for default '-misa=sm_30'", > "nvptx: Allow '--with-arch' to override the default '-mis
Define 'OMP_REQUIRES_[...]', 'GOMP_REQUIRES_[...]' in a single place (was: [Patch] OpenMP: Move omp requires checks to libgomp)
Hi! On 2022-06-08T05:56:02+0200, Tobias Burnus wrote: > This is based on Chung-Lin's patch at > https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563393.html > --- a/include/gomp-constants.h > +++ b/include/gomp-constants.h > +/* Flag values for requires-directive features, must match corresponding > + OMP_REQUIRES_* values in gcc/omp-general.h. */ > +#define GOMP_REQUIRES_UNIFIED_ADDRESS 0x10 > +#define GOMP_REQUIRES_UNIFIED_SHARED_MEMORY 0x20 > +#define GOMP_REQUIRES_REVERSE_OFFLOAD 0x80 To make things more failure proof, OK to push the attached "Define 'OMP_REQUIRES_[...]', 'GOMP_REQUIRES_[...]' in a single place"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From bd1aa5bc96e141a85bb53d61a5c7531e09ea3cf6 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 5 Jul 2022 11:04:46 +0200 Subject: [PATCH] Define 'OMP_REQUIRES_[...]', 'GOMP_REQUIRES_[...]' in a single place Clean up for recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". gcc/ * omp-general.h (enum omp_requires): Use 'GOMP_REQUIRES_[...]'. include/ * gomp-constants.h (OMP_REQUIRES_[...]): Update comment. --- gcc/omp-general.h| 8 include/gomp-constants.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/gcc/omp-general.h b/gcc/omp-general.h index 7a94831e8f5..74e90e1a71a 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -126,12 +126,12 @@ extern int oacc_get_ifn_dim_arg (const gimple *stmt); enum omp_requires { OMP_REQUIRES_ATOMIC_DEFAULT_MEM_ORDER = 0xf, - OMP_REQUIRES_UNIFIED_ADDRESS = 0x10, - OMP_REQUIRES_UNIFIED_SHARED_MEMORY = 0x20, + OMP_REQUIRES_UNIFIED_ADDRESS = GOMP_REQUIRES_UNIFIED_ADDRESS, + OMP_REQUIRES_UNIFIED_SHARED_MEMORY = GOMP_REQUIRES_UNIFIED_SHARED_MEMORY, OMP_REQUIRES_DYNAMIC_ALLOCATORS = 0x40, - OMP_REQUIRES_REVERSE_OFFLOAD = 0x80, + OMP_REQUIRES_REVERSE_OFFLOAD = GOMP_REQUIRES_REVERSE_OFFLOAD, OMP_REQUIRES_ATOMIC_DEFAULT_MEM_ORDER_USED = 0x100, - OMP_REQUIRES_TARGET_USED = 0x200 + OMP_REQUIRES_TARGET_USED = GOMP_REQUIRES_TARGET_USED, }; extern GTY(()) enum omp_requires omp_requires_mask; diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 3e3078f082e..84316f953d0 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -341,8 +341,7 @@ enum gomp_map_kind #define GOMP_DEPEND_MUTEXINOUTSET 4 #define GOMP_DEPEND_INOUTSET 5 -/* Flag values for requires-directive features, must match corresponding - OMP_REQUIRES_* values in gcc/omp-general.h. */ +/* Flag values for OpenMP 'requires' directive features. */ #define GOMP_REQUIRES_UNIFIED_ADDRESS 0x10 #define GOMP_REQUIRES_UNIFIED_SHARED_MEMORY 0x20 #define GOMP_REQUIRES_REVERSE_OFFLOAD 0x80 -- 2.35.1
Restore 'GOMP_offload_unregister_ver' functionality (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp)
Hi! On 2022-07-01T15:06:05+0200, Tobias Burnus wrote: > Attached is the updated patch. Main changes: [...] This is now a great implementation of cross-component communication (host/offloading compilers, runtime), thanks! I'm sure this will be usable (or at least instructing) for further purposes, too. > - Uses GOMP_register_var to pass the mask to libgomp Like 'GOMP_offload_register_ver', also 'GOMP_offload_unregister_ver' needs to be adjusted correspondingly. OK to push the attached "Restore 'GOMP_offload_unregister_ver' functionality"? (Currently testing.) > (and no longer a weak variable) ... which actually removed my "contribution" (hack!) to this patch. ;-) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 9a49a3e1e4d3def7b48beccdde6fa9f218719244 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 5 Jul 2022 18:23:15 +0200 Subject: [PATCH] Restore 'GOMP_offload_unregister_ver' functionality The recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp" changed the 'GOMP_offload_register_ver' interface but didn't change 'GOMP_offload_unregister_ver' accordingly, so we're no longer actually unregistering. gcc/ * config/gcn/mkoffload.cc (process_obj): Clarify 'target_data' -> '[...]_data'. * config/nvptx/mkoffload.cc (process): Likewise. libgomp/ * target.c (GOMP_offload_register_ver): Clarify 'target_data' -> 'data'. (GOMP_offload_unregister_ver): Likewise. Fix up 'target_data', and add 'assert'. --- gcc/config/gcn/mkoffload.cc | 8 gcc/config/nvptx/mkoffload.cc | 8 libgomp/target.c | 33 ++--- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index b8b3fecfcb4..d2464332275 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -692,13 +692,13 @@ process_obj (FILE *in, FILE *cfile, uint32_t omp_requires) len); fprintf (cfile, - "static const struct gcn_image_desc {\n" + "static const struct gcn_data {\n" " uintptr_t omp_requires_mask;\n" " const struct gcn_image *gcn_image;\n" " unsigned kernel_count;\n" " const struct hsa_kernel_description *kernel_infos;\n" " unsigned global_variable_count;\n" - "} target_data = {\n" + "} gcn_data = {\n" " %d,\n" " &gcn_image,\n" " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n" @@ -723,7 +723,7 @@ process_obj (FILE *in, FILE *cfile, uint32_t omp_requires) fprintf (cfile, "static __attribute__((constructor)) void init (void)\n" "{\n" " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__," - " %d/*GCN*/, &target_data);\n" + " %d/*GCN*/, &gcn_data);\n" "};\n", GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN), GOMP_DEVICE_GCN); @@ -731,7 +731,7 @@ process_obj (FILE *in, FILE *cfile, uint32_t omp_requires) fprintf (cfile, "static __attribute__((destructor)) void fini (void)\n" "{\n" " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__," - " %d/*GCN*/, &target_data);\n" + " %d/*GCN*/, &gcn_data);\n" "};\n", GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN), GOMP_DEVICE_GCN); diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index d8c81eb0547..0fa5f4423bf 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -310,7 +310,7 @@ process (FILE *in, FILE *out, uint32_t omp_requires) fprintf (out, "\n};\n\n"); fprintf (out, - "static const struct nvptx_tdata {\n" + "static const struct nvptx_data {\n" " uintptr_t omp_requires_mask;\n" " const struct ptx_obj *ptx_objs;\n" " unsigned ptx_num;\n" @@ -318,7 +318,7 @@ process (FILE *in, FILE *out, uint32_t omp_requires) " unsigned var_num;\n" " const struct nvptx_fn *fn_names;\n" " unsigned fn_num;\n" - "} target_data = {\n" + "} nvptx_data = {\n" " %d, ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n" " var_mappings," " sizeof (var_mappings) / sizeof (var_mappings[0]),\n" @@ -344,7 +344,7 @@ process (FILE *in, FILE *out, uint3
Fix Intel MIC 'mkoffload' for OpenMP 'requires' (was: [Patch] OpenMP: Move omp requires checks to libgomp)
Hi! On 2022-06-08T05:56:02+0200, Tobias Burnus wrote: > This is based on Chung-Lin's patch at > https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563393.html > PS: I have not fully tested the intelmic version. As part of my standard testing, I'm reporting that it got completely broken. ;'-) Your commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp" states: "When the device lto1 runs, it extracts the data for mkoffload. The latter than passes the value on to GOMP_offload_register_ver." That's not implemented for Intel MIC 'mkoffload', so we always run into 'gcc/lto-cgraph.cc:input_offload_tables': +#ifdef ACCEL_COMPILER + char *omp_requires_file = getenv ("GCC_OFFLOAD_OMP_REQUIRES_FILE"); + if (omp_requires_file == NULL || omp_requires_file[0] == '\0') +fatal_error (input_location, "GCC_OFFLOAD_OMP_REQUIRES_FILE unset"); ..., and all offloading compilation fail with that 'fatal_error'. OK to push the attached "Fix Intel MIC 'mkoffload' for OpenMP 'requires'"? (Currently testing.) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From afd77646d7ced9f58fb49667e37ee4e21dd6fc53 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 5 Jul 2022 12:21:33 +0200 Subject: [PATCH] Fix Intel MIC 'mkoffload' for OpenMP 'requires' Similar to how the other 'mkoffload's got changed in recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". This also means finally switching Intel MIC 'mkoffload' to 'GOMP_offload_register_ver', 'GOMP_offload_unregister_ver', making 'GOMP_offload_register', 'GOMP_offload_unregister' legacy entry points. gcc/ * config/i386/intelmic-mkoffload.cc (generate_host_descr_file) (prepare_target_image, main): Handle OpenMP 'requires'. (generate_host_descr_file): Switch to 'GOMP_offload_register_ver', 'GOMP_offload_unregister_ver'. libgomp/ * target.c (GOMP_offload_register, GOMP_offload_unregister): Denote as legacy entry points. * testsuite/libgomp.c-c++-common/requires-1.c: Enable for all 'target offloading_enabled'. * testsuite/libgomp.c-c++-common/requires-5.c: Likewise. * testsuite/libgomp.c-c++-common/requires-7.c: Likewise. * testsuite/libgomp.fortran/requires-1.f90: Likewise. --- gcc/config/i386/intelmic-mkoffload.cc | 56 +++ libgomp/target.c | 4 ++ .../libgomp.c-c++-common/requires-1.c | 2 +- .../libgomp.c-c++-common/requires-5.c | 2 +- .../libgomp.c-c++-common/requires-7.c | 2 +- .../testsuite/libgomp.fortran/requires-1.f90 | 2 +- 6 files changed, 52 insertions(+), 16 deletions(-) diff --git a/gcc/config/i386/intelmic-mkoffload.cc b/gcc/config/i386/intelmic-mkoffload.cc index c683d6f473e..596f6f107b8 100644 --- a/gcc/config/i386/intelmic-mkoffload.cc +++ b/gcc/config/i386/intelmic-mkoffload.cc @@ -370,7 +370,7 @@ generate_target_offloadend_file (const char *target_compiler) /* Generates object file with the host side descriptor. */ static const char * -generate_host_descr_file (const char *host_compiler) +generate_host_descr_file (const char *host_compiler, uint32_t omp_requires) { char *dump_filename = concat (dumppfx, "_host_descr.c", NULL); const char *src_filename = save_temps @@ -386,39 +386,50 @@ generate_host_descr_file (const char *host_compiler) if (!src_file) fatal_error (input_location, "cannot open '%s'", src_filename); + fprintf (src_file, "#include \n\n"); + fprintf (src_file, "extern const void *const __OFFLOAD_TABLE__;\n" "extern const void *const __offload_image_intelmic_start;\n" "extern const void *const __offload_image_intelmic_end;\n\n" - "static const void *const __offload_target_data[] = {\n" + "static const struct intelmic_data {\n" + " uintptr_t omp_requires_mask;\n" + " const void *const image_start;\n" + " const void *const image_end;\n" + "} intelmic_data = {\n" + " %d,\n" " &__offload_image_intelmic_start, &__offload_image_intelmic_end\n" - "};\n\n"); + "};\n\n", omp_requires); fprintf (src_file, "#ifdef __cplusplus\n" "extern \"C\"\n" "#endif\n" - "void GOMP_offload_register (const void *, int, const void *);\n" +
Re: Fix Intel MIC 'mkoffload' for OpenMP 'requires' (was: [Patch] OpenMP: Move omp requires checks to libgomp)
Hi Tobias! On 2022-07-06T13:29:14+0200, Tobias Burnus wrote: > On 06.07.22 13:04, Thomas Schwinge wrote: >> On 2022-06-08T05:56:02+0200, Tobias Burnus wrote: >>> PS: I have not fully tested the intelmic version. >> As part of my standard testing, I'm reporting that it got completely >> broken. ;'-) > > Interesting. Because intelmic-mkoffload.cc calls GOMP_offload_register > and not GOMP_offload_register_ver - and that call path should be unchanged. True indeed for that code path... > However, I missed that I had an assert that GCC_OFFLOAD_OMP_REQUIRES_FILE is > set. ..., but not for that one. > Thus, an alternative is to change that into an 'if'. > But I concur that updating intelmic-mkoffload.cc is nicer! Thanks! ACK. > Regarding: >> -! { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } >> } >> +! { dg-do link { target offloading_enabled } } > This patch looks wrong. We are not interested whether there is an offloading > device > available or not - but whether the offloading compiler is running. > > Those are completely independent. Obviously, offloading can be configured but > not > being present. (That's the usual case for testing distro builds but also can > occur elsewhere.) > And also the reverse if possible - usually because of -foffload=... but when > GCC is > configured with --enable-offload-defaulted, also other combinations are > possible. > > > I think the proper check would be write and use an 'offload_target_any', > i.e. OFFLOAD_TARGET_NAMES= being present and nonempty. > > Cf. check_effective_target_offload_target_nvptx / ..._amdgcn and > libgomp_check_effective_target_offload_target > in libgomp/testsuite/lib/libgomp.exp > > Possible patch (untested): > > # Return 1 if compiling for some offload target(s) > proc check_effective_target_offload_target_any { } { > return [libgomp_check_effective_target_offload_target ""] > } > > At least if I understand the following correctly, "" should work: > return [string match "*:$target_name*:*" ":$gcc_offload_targets:"] :-) Haha, that's actually *exactly* what I had implemented first! But then I realized that 'target offloading_enabled' is doing exactly that: check that offloading compilation is configured -- not that "there is an offloading device available or not" as you seem to understand? Or am I confused there? I do however agree that (generally) replacing 'target offloading_enabled' with a new 'target offload_target_any' would seem appropriate (as a separate patch), because that would also do the right thing when running libgomp testing with non-default '-foffload=[...]', including '-foffload=disable'. For checking "offloading device available" we'd use 'check_effective_target_offload_device[...]'. Grüße Thomas > Thanks for taking care of the patch fallout! > > Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: Restore 'GOMP_offload_unregister_ver' functionality (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp)
Hi Tobias! On 2022-07-06T15:59:59+0200, Tobias Burnus wrote: > On 06.07.22 12:42, Thomas Schwinge wrote: >> --- a/libgomp/target.c >> +++ b/libgomp/target.c >> /* This function should be called from every offload image while unloading. >> GOMP_offload_unregister_ver (unsigned version, const void *host_table, >> /* Remove image from array of pending images. */ >> + bool found = false; >> for (i = 0; i < num_offload_images; i++) >> if (offload_images[i].target_data == target_data) >> { >> offload_images[i] = offload_images[--num_offload_images]; >> +found = true; >> break; >> } >> + assert (found); >> >> gomp_mutex_unlock (®ister_lock); >> } > > ... I don't like that libgomp crashes without any helpful message in that > case. > > In my opinion: > * Either we assume that it is unlikely to occur - ignore it. >(Matches the current implementation: do nothing.) > * Or we want to have some diagnostic in case it occurs. But in that case, >it should be some explicit diagnostic printed by gomp_error or gomp_fatal. >IMHO, gomp_error is better than gomp_fatal as libgomp then continues > cleaning >up after this error, which IMHO makes more sense that just aborting. I'd be fine to change this into a 'gomp_error', but I don't think it's necessary. Maybe that wasn't obvious (and I should add a source code comment), but my point here is that this situation really should never arise (hence, if it does: internal error, thus 'assert'). Or, in other words, such a check should've been present in the original implementation already -- and would then have flagged your patch as being incomplete in that function. Thinking about it again, shouldn't we also add a corresponding sanity-check ('assert') to 'GOMP_offload_register_ver', such that the newly registered offload image must not already be present in 'offload_images'? (Isn't that understanding also supported by the 'break' in 'if (offload_images[i].target_data == target_data)' in 'GOMP_offload_unregister_ver', as cited above: that no duplicates are expected?) That's at least my understanding of the situation; happy to hear if I'm wrong. (It's a pity that we're totally devoid of test cases for dynamic registration/unregistration of offload images...) Anyway: it's totally fine to address (or not, if so desired) this sanity-check aspect independently of the other changes, so I've backed that out, and then pushed to master branch commit 3f05e03d6cfdf723ca0556318b6a9aa37be438e7 "Restore 'GOMP_offload_unregister_ver' functionality", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 3f05e03d6cfdf723ca0556318b6a9aa37be438e7 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 5 Jul 2022 18:23:15 +0200 Subject: [PATCH] Restore 'GOMP_offload_unregister_ver' functionality The recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp" changed the 'GOMP_offload_register_ver' interface but didn't change 'GOMP_offload_unregister_ver' accordingly, so we're no longer actually unregistering. gcc/ * config/gcn/mkoffload.cc (process_obj): Clarify 'target_data' -> '[...]_data'. * config/nvptx/mkoffload.cc (process): Likewise. libgomp/ * target.c (GOMP_offload_register_ver): Clarify 'target_data' -> 'data'. (GOMP_offload_unregister_ver): Likewise. Fix up 'target_data'. --- gcc/config/gcn/mkoffload.cc | 8 gcc/config/nvptx/mkoffload.cc | 8 libgomp/target.c | 30 +++--- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index b8b3fecfcb4..d2464332275 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -692,13 +692,13 @@ process_obj (FILE *in, FILE *cfile, uint32_t omp_requires) len); fprintf (cfile, - "static const struct gcn_image_desc {\n" + "static const struct gcn_data {\n" " uintptr_t omp_requires_mask;\n" " const struct gcn_image *gcn_image;\n" " unsigned kernel_count;\n" " const struct hsa_kernel_description *kernel_infos;\n" " unsigned global_variable_count;\n" - "} target_data = {\n" + "} gc
Adjust 'libgomp.c-c++-common/requires-3.c' (was: [Patch][v4] OpenMP: Move omp requires checks to libgomp)
Hi! In preparation for other changes: On 2022-06-29T16:33:02+0200, Tobias Burnus wrote: > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-3-aux.c > @@ -0,0 +1,11 @@ > +/* { dg-skip-if "" { *-*-* } } */ > + > +#pragma omp requires unified_address > + > +int x; > + > +void foo (void) > +{ > + #pragma omp target > + x = 1; > +} > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-3.c > @@ -0,0 +1,24 @@ > +/* { dg-do link { target offloading_enabled } } */ Not expected to see 'offloading_enabled' here... > +/* { dg-additional-sources requires-3-aux.c } */ > + > +/* Check diagnostic by device-compiler's lto1. ..., because of this note ^. > + Other file uses: 'requires unified_address'. */ > + > +#pragma omp requires unified_address,unified_shared_memory > + > +int a[10]; > +extern void foo (void); > + > +int > +main (void) > +{ > + #pragma omp target > + for (int i = 0; i < 10; i++) > +a[i] = 0; > + > + foo (); > + return 0; > +} > + > +/* { dg-error "OpenMP 'requires' directive with non-identical clauses in > multiple compilation units: 'unified_address, unified_shared_memory' vs. > 'unified_address'" "" { target *-*-* } 0 } */ > +/* { dg-excess-errors "Ignore messages like: errors during merging of > translation units|mkoffload returned 1 exit status" } */ OK to push the attached "Adjust 'libgomp.c-c++-common/requires-3.c'"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 6a4031b351680bdbfe3cdb9ac4e4a3aa59e4ca84 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 7 Jul 2022 09:59:45 +0200 Subject: [PATCH] Adjust 'libgomp.c-c++-common/requires-3.c' As documented, this one does "Check diagnostic by device-compiler's lto1". Indeed there are none when compiling with '-foffload=disable' with an offloading-enabled compiler, so we should use 'offload_target_[...]', as used in other similar test cases. Follow-up to recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". libgomp/ * testsuite/libgomp.c-c++-common/requires-3.c: Adjust. --- libgomp/testsuite/libgomp.c-c++-common/requires-3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-3.c b/libgomp/testsuite/libgomp.c-c++-common/requires-3.c index 4b07ffdd09b..7091f400ef0 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-3.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-3.c @@ -1,4 +1,4 @@ -/* { dg-do link { target offloading_enabled } } */ +/* { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } } */ /* { dg-additional-sources requires-3-aux.c } */ /* Check diagnostic by device-compiler's lto1. -- 2.35.1
Enhance 'libgomp.c-c++-common/requires-4.c', 'libgomp.c-c++-common/requires-5.c' testing (was: [Patch][v4] OpenMP: Move omp requires checks to libgomp)
Hi! In preparation for other changes: On 2022-06-29T16:33:02+0200, Tobias Burnus wrote: > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4-aux.c > @@ -0,0 +1,13 @@ > +/* { dg-skip-if "" { *-*-* } } */ > + > +#pragma omp requires reverse_offload > + > +/* Note: The file does not have neither of: > + declare target directives, device constructs or device routines. */ > + > +int x; > + > +void foo (void) > +{ > + x = 1; > +} > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c > @@ -0,0 +1,23 @@ > +/* { dg-do link { target offloading_enabled } } */ > +/* { dg-additional-options "-flto" } */ > +/* { dg-additional-sources requires-4-aux.c } */ > + > +/* Check diagnostic by device-compiler's or host compiler's lto1. > + Other file uses: 'requires reverse_offload', but that's inactive as > + there are no declare target directives, device constructs nor device > routines */ > + > +#pragma omp requires unified_address,unified_shared_memory > + > +int a[10]; > +extern void foo (void); > + > +int > +main (void) > +{ > + #pragma omp target > + for (int i = 0; i < 10; i++) > +a[i] = 0; > + > + foo (); > + return 0; > +} > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-5-aux.c > @@ -0,0 +1,11 @@ > +/* { dg-skip-if "" { *-*-* } } */ > + > +#pragma omp requires unified_shared_memory, unified_address, reverse_offload > + > +int x; > + > +void foo (void) > +{ > + #pragma omp target > + x = 1; > +} > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-5.c > @@ -0,0 +1,20 @@ > +/* { dg-do run { target { offload_target_nvptx || offload_target_amdgcn } } > } */ > +/* { dg-additional-sources requires-5-aux.c } */ > + > +#pragma omp requires unified_shared_memory, unified_address, reverse_offload > + > +int a[10]; > +extern void foo (void); > + > +int > +main (void) > +{ > + #pragma omp target > + for (int i = 0; i < 10; i++) > +a[i] = 0; > + > + foo (); > + return 0; > +} > + > +/* { dg-output "devices present but 'omp requires unified_address, > unified_shared_memory, reverse_offload' cannot be fulfilled" } */ (The latter diagnostic later got conditionalized by 'GOMP_DEBUG=1'.) OK to push the attached "Enhance 'libgomp.c-c++-common/requires-4.c', 'libgomp.c-c++-common/requires-5.c' testing"? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From ae14ccbd050d0b49073d5ea09de3e2af63f8c674 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 7 Jul 2022 09:45:42 +0200 Subject: [PATCH] Enhance 'libgomp.c-c++-common/requires-4.c', 'libgomp.c-c++-common/requires-5.c' testing These should compile and link and execute in all configurations; host-fallback execution, which we may actually verify. Follow-up to recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". libgomp/ * testsuite/libgomp.c-c++-common/requires-4.c: Enhance testing. * testsuite/libgomp.c-c++-common/requires-5.c: Likewise. --- .../libgomp.c-c++-common/requires-4.c | 17 - .../libgomp.c-c++-common/requires-5.c | 18 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-4.c b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c index 128fdbb8463..deb04368108 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c @@ -1,22 +1,29 @@ -/* { dg-do link { target offloading_enabled } } */ /* { dg-additional-options "-flto" } */ /* { dg-additional-sources requires-4-aux.c } */ -/* Check diagnostic by device-compiler's or host compiler's lto1. +/* Check no diagnostic by device-compiler's or host compiler's lto1. Other file uses: 'requires reverse_offload', but that's inactive as there are no declare target directives, device constructs nor device routines */ +/* For actual offload execution, prints the following (only) if GOMP_DEBUG=1: + "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled" + and does host-fallback execution. */ + #pragma omp requires unified_address,unified_shared_memory -int a[10]; +int a[10] = { 0 }; extern void foo (void); int ma
Re: Enhance 'libgomp.c-c++-common/requires-4.c', 'libgomp.c-c++-common/requires-5.c' testing (was: [Patch][v4] OpenMP: Move omp requires checks to libgomp)
Hi Tobias! On 2022-07-07T11:36:34+0200, Tobias Burnus wrote: > On 07.07.22 10:42, Thomas Schwinge wrote: >> In preparation for other changes: > ... >> On 2022-06-29T16:33:02+0200, Tobias Burnus wrote: >>> +/* { dg-output "devices present but 'omp requires unified_address, >>> unified_shared_memory, reverse_offload' cannot be fulfilled" } */ >> (The latter diagnostic later got conditionalized by 'GOMP_DEBUG=1'.) >> OK to push the attached "Enhance 'libgomp.c-c++-common/requires-4.c', >> 'libgomp.c-c++-common/requires-5.c' testing"? > ... >> libgomp/ >> * testsuite/libgomp.c-c++-common/requires-4.c: Enhance testing. >> * testsuite/libgomp.c-c++-common/requires-5.c: Likewise. > ... >> --- a/libgomp/testsuite/libgomp.c-c++-common/requires-4.c >> +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c >> @@ -1,22 +1,29 @@ >> -/* { dg-do link { target offloading_enabled } } */ >> /* { dg-additional-options "-flto" } */ >> /* { dg-additional-sources requires-4-aux.c } */ >> >> -/* Check diagnostic by device-compiler's or host compiler's lto1. >> +/* Check no diagnostic by device-compiler's or host compiler's lto1. > > I note that without ENABLE_OFFLOADING that there is never any lto1 > diagnostic. > > However, given that no diagnostic is expected, it also works for "! > offloading_enabled". > > Thus, the change fine. ACK. >> Other file uses: 'requires reverse_offload', but that's inactive as >> there are no declare target directives, device constructs nor device >> routines */ >> >> +/* For actual offload execution, prints the following (only) if >> GOMP_DEBUG=1: >> + "devices present but 'omp requires unified_address, >> unified_shared_memory, reverse_offload' cannot be fulfilled" >> + and does host-fallback execution. */ > > The latter is only true when also device code is produced – and a device > is available for that/those device types. I think that's what you imply > by "For actual offload execution" ACK. > but it is a bit hidden. > > Maybe s/For actual offload execution, prints/It may print/ is clearer? I've settled on: /* Depending on offload device capabilities, it may print something like the following (only) if GOMP_DEBUG=1: "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled" and in that case does host-fallback execution. */ > In principle, it would be nice if we could test for the output, but > currently setting an env var for remote execution does not work, yet. > Cf. https://gcc.gnu.org/pipermail/gcc-patches/2022-July/597773.html Right, I'm aware of that issue with remote testing, and that's why I didn't propose such output verification. (In a few other test cases, we do have 'dg-set-target-env-var GOMP_DEBUG "1"', which then at present are UNSUPPORTED for remote testing.) > When set, we could use offload_target_nvptx etc. (..._amdgcn, ..._any) > to test – as this guarantees that it is compiled for that device + the > device is available. Use 'target offload_device_nvptx', not 'target offload_target_nvptx', etc. ;-) >> + >> #pragma omp requires unified_address,unified_shared_memory >> >> -int a[10]; >> +int a[10] = { 0 }; >> extern void foo (void); >> >> int >> main (void) >> { >> - #pragma omp target >> + #pragma omp target map(to: a) > > Hmm, I wonder whether I like it or not. Without, there is an implicit > "map(tofrom:a)". On the other hand, OpenMP permits that – even with > unified-shared memory – the implementation my copy the data to the > device. (For instance, to permit faster access to "a".) > > Thus, ... > >> + for (int i = 0; i < 10; i++) >> +a[i] = i; >> + >> for (int i = 0; i < 10; i++) >> -a[i] = 0; >> +if (a[i] != i) >> + __builtin_abort (); > ... this condition (back on the host) could also fail with USM. However, > given that to my knowledge no USM implementation actually copies the > data, I believe it is fine. Right, this is meant to describe/test the current GCC master branch behavior, where USM isn't supported, so I didn't consider that. But I agree, a source code comment should be added: As no offload devices support USM at present, we may verify host-fallback execution by absence of separate memory spaces. */ > (Disclaimer: I
Re: Fix Intel MIC 'mkoffload' for OpenMP 'requires' (was: [Patch] OpenMP: Move omp requires checks to libgomp)
Hi Tobias! On 2022-07-06T15:30:57+0200, Tobias Burnus wrote: > On 06.07.22 14:38, Thomas Schwinge wrote: >> :-) Haha, that's actually *exactly* what I had implemented first! But >> then I realized that 'target offloading_enabled' is doing exactly that: >> check that offloading compilation is configured -- not that "there is an >> offloading device available or not" as you seem to understand? Or am I >> confused there? > > I think as you mentioned below – there is a difference. Eh, thanks for un-confusing me on that aspect! There's a reason after all that 'offloading_enabled' lives in 'gcc/testsuite/lib/'... > And that difference, > I explicitly maked use of: [...] > Granted, as the other files do not use -foffload=..., it should not > make a difference - but, still, replacing it unconditionally > with 'target offloading_enabled' feels wrong. ACK! I've pushed to master branch commit 9ef714539cb7cc1cd746312fd5dcc987bf167471 "Fix Intel MIC 'mkoffload' for OpenMP 'requires'", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 9ef714539cb7cc1cd746312fd5dcc987bf167471 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 5 Jul 2022 12:21:33 +0200 Subject: [PATCH] Fix Intel MIC 'mkoffload' for OpenMP 'requires' Similar to how the other 'mkoffload's got changed in recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". This also means finally switching Intel MIC 'mkoffload' to 'GOMP_offload_register_ver', 'GOMP_offload_unregister_ver', making 'GOMP_offload_register', 'GOMP_offload_unregister' legacy entry points. gcc/ * config/i386/intelmic-mkoffload.cc (generate_host_descr_file) (prepare_target_image, main): Handle OpenMP 'requires'. (generate_host_descr_file): Switch to 'GOMP_offload_register_ver', 'GOMP_offload_unregister_ver'. libgomp/ * target.c (GOMP_offload_register, GOMP_offload_unregister): Denote as legacy entry points. * testsuite/lib/libgomp.exp (check_effective_target_offload_target_any): New proc. * testsuite/libgomp.c-c++-common/requires-1.c: Enable for 'offload_target_any'. * testsuite/libgomp.c-c++-common/requires-3.c: Likewise. * testsuite/libgomp.c-c++-common/requires-7.c: Likewise. * testsuite/libgomp.fortran/requires-1.f90: Likewise. --- gcc/config/i386/intelmic-mkoffload.cc | 56 +++ libgomp/target.c | 4 ++ libgomp/testsuite/lib/libgomp.exp | 5 ++ .../libgomp.c-c++-common/requires-1.c | 2 +- .../libgomp.c-c++-common/requires-3.c | 2 +- .../libgomp.c-c++-common/requires-7.c | 2 +- .../testsuite/libgomp.fortran/requires-1.f90 | 2 +- 7 files changed, 57 insertions(+), 16 deletions(-) diff --git a/gcc/config/i386/intelmic-mkoffload.cc b/gcc/config/i386/intelmic-mkoffload.cc index c683d6f473e..596f6f107b8 100644 --- a/gcc/config/i386/intelmic-mkoffload.cc +++ b/gcc/config/i386/intelmic-mkoffload.cc @@ -370,7 +370,7 @@ generate_target_offloadend_file (const char *target_compiler) /* Generates object file with the host side descriptor. */ static const char * -generate_host_descr_file (const char *host_compiler) +generate_host_descr_file (const char *host_compiler, uint32_t omp_requires) { char *dump_filename = concat (dumppfx, "_host_descr.c", NULL); const char *src_filename = save_temps @@ -386,39 +386,50 @@ generate_host_descr_file (const char *host_compiler) if (!src_file) fatal_error (input_location, "cannot open '%s'", src_filename); + fprintf (src_file, "#include \n\n"); + fprintf (src_file, "extern const void *const __OFFLOAD_TABLE__;\n" "extern const void *const __offload_image_intelmic_start;\n" "extern const void *const __offload_image_intelmic_end;\n\n" - "static const void *const __offload_target_data[] = {\n" + "static const struct intelmic_data {\n" + " uintptr_t omp_requires_mask;\n" + " const void *const image_start;\n" + " const void *const image_end;\n" + "} intelmic_data = {\n" + " %d,\n" " &__offload_image_intelmic_start, &__offload_image_intelmic_end\n" - "};\n\n"); + "};\n\n", omp_requires); fprintf (src_file, "#ifdef __cplusplus\n" "extern \"C\"\n"
Fix one issue in OpenMP 'requires' directive diagnostics (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp)
else > + { > + error ("OpenMP % directive with %qs specified " > + "only in some compilation units", buf2); > + inform (UNKNOWN_LOCATION, "%qs has %qs", > + val != OMP_REQUIRES_TARGET_USED ? fn2 : fn1, > + buf2); > + inform (UNKNOWN_LOCATION, "but %qs has not", > + val != OMP_REQUIRES_TARGET_USED ? fn1 : fn2); > + } > + error_emitted = true; > + } > } > else > fatal_error (input_location, Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 2271282f127366e785601242a46d4aa668bd6660 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 7 Jul 2022 15:11:03 +0200 Subject: [PATCH] Fix one issue in OpenMP 'requires' directive diagnostics Fix-up for recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". gcc/ * lto-cgraph.cc (input_offload_tables) : Correct 'fn2' computation. libgomp/ * testsuite/libgomp.c-c++-common/requires-1.c: Add 'dg-note's. * testsuite/libgomp.c-c++-common/requires-2.c: Likewise. * testsuite/libgomp.c-c++-common/requires-3.c: Likewise. * testsuite/libgomp.c-c++-common/requires-7.c: Likewise. * testsuite/libgomp.fortran/requires-1.f90: Likewise. --- gcc/lto-cgraph.cc | 2 +- libgomp/testsuite/libgomp.c-c++-common/requires-1.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-2.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-3.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-7.c | 7 ++- libgomp/testsuite/libgomp.fortran/requires-1.f90| 5 + 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 48629651e31..6d9c36ea8b6 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -1879,7 +1879,7 @@ input_offload_tables (bool do_force_output) && TREE_CODE (tmp_decl) != TRANSLATION_UNIT_DECL) tmp_decl = DECL_CONTEXT (tmp_decl); if (tmp_decl != NULL_TREE) - fn2 = IDENTIFIER_POINTER (DECL_NAME (requires_decl)); + fn2 = IDENTIFIER_POINTER (DECL_NAME (tmp_decl)); } char buf1[sizeof ("unified_address, unified_shared_memory, " diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c index ab9a8ddfcde..31996f1ecf6 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c @@ -20,5 +20,7 @@ main (void) return 0; } -/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } */ +/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } + { dg-note {requires-1\.c' has 'unified_shared_memory'} {} { target *-*-* } 0 } + { dg-note {requires-1-aux\.c' has 'unified_address'} {} { target *-*-* } 0 } */ /* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-2.c b/libgomp/testsuite/libgomp.c-c++-common/requires-2.c index be1830d0c46..b20e154b0c7 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-2.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-2.c @@ -21,5 +21,7 @@ main (void) return 0; } -/* { dg-error "OpenMP 'requires' directive with 'unified_shared_memory' specified only in some compilation units" "" { target *-*-* } 0 } */ +/* { dg-error "OpenMP 'requires' directive with 'unified_shared_memory' specified only in some compilation units" "" { target *-*-* } 0 } + { dg-note {requires-2\.c' has 'unified_shared_memory'} {} { target *-*-* } 0 } + { dg-note {but '[^']*requires-2-aux\.c' has not} {} { target *-*-* } 0 } */ /* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-3.c b/libgomp/testsuite/libgomp.c-c++-common/requires-3.c index 1c204c8a21e..a549a19ebb
Re: Fix one issue in OpenMP 'requires' directive diagnostics (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp)
Hi! On 2022-07-07T15:56:28+0200, Tobias Burnus wrote: > On 07.07.22 15:26, Thomas Schwinge wrote: >> On 2022-07-01T23:08:16+0200, Tobias Burnus >> wrote: >>> Updated version attached – I hope I got everything right, but I start to >>> get tired, I am not 100% sure. >> ..., and so the obligatory copy'n'past-o;-) crept in: > ... >>> + if (tmp_decl != NULL_TREE) >>> + fn2 = IDENTIFIER_POINTER (DECL_NAME (requires_decl)); >>> + } >> ... here: tmp_decl' not 'requires_decl'. OK to push the attached >> "Fix one issue in OpenMP 'requires' directive diagnostics"? > Good that you spotted it and thanks for testing + fixing it! >> I'd even push that one "as obvious", but thought I'd ask whether you >> maybe have a quick idea about the XFAILs that I'm adding? (I'm otherwise >> not planning on resolving that issue at this time.) > > (This question relates to what's printed if there is no > TRANSLATION_UNIT_DECL.) > [...] Thanks for the explanation, makes sense. > Regarding the xfail: I think it is fine to have this xfail, but as it is > clear why inform points to /tmp/cc*.o, you could reword the TODO to > state why it goes wrong. Done: TODO We're currently not streaming location information for the OpenMP directives used in 'requires-7-aux.c', so we're not seeing the source file name here (but a temporary '*.o' instead; for details, see <https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598011.html>): { dg-note {requires-7-aux\.c' has 'unified_address'} {} { xfail *-*-* } 0 } ..., but we may still verify that the rest of the diagnostic is correct: { dg-note {' has 'unified_address'} {} { target *-*-* } 0 } With just that changed, I've pushed to master branch commit faa0c328ee65f0d6d65d6e20181d26e336071919 "Fix one issue in OpenMP 'requires' directive diagnostics", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From faa0c328ee65f0d6d65d6e20181d26e336071919 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 7 Jul 2022 15:11:03 +0200 Subject: [PATCH] Fix one issue in OpenMP 'requires' directive diagnostics Fix-up for recent commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0 "OpenMP: Move omp requires checks to libgomp". gcc/ * lto-cgraph.cc (input_offload_tables) : Correct 'fn2' computation. libgomp/ * testsuite/libgomp.c-c++-common/requires-1.c: Add 'dg-note's. * testsuite/libgomp.c-c++-common/requires-2.c: Likewise. * testsuite/libgomp.c-c++-common/requires-3.c: Likewise. * testsuite/libgomp.c-c++-common/requires-7.c: Likewise. * testsuite/libgomp.fortran/requires-1.f90: Likewise. --- gcc/lto-cgraph.cc | 2 +- libgomp/testsuite/libgomp.c-c++-common/requires-1.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-2.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-3.c | 4 +++- libgomp/testsuite/libgomp.c-c++-common/requires-7.c | 10 +- libgomp/testsuite/libgomp.fortran/requires-1.f90| 8 6 files changed, 27 insertions(+), 5 deletions(-) diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 48629651e31..6d9c36ea8b6 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -1879,7 +1879,7 @@ input_offload_tables (bool do_force_output) && TREE_CODE (tmp_decl) != TRANSLATION_UNIT_DECL) tmp_decl = DECL_CONTEXT (tmp_decl); if (tmp_decl != NULL_TREE) - fn2 = IDENTIFIER_POINTER (DECL_NAME (requires_decl)); + fn2 = IDENTIFIER_POINTER (DECL_NAME (tmp_decl)); } char buf1[sizeof ("unified_address, unified_shared_memory, " diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c index ab9a8ddfcde..31996f1ecf6 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c @@ -20,5 +20,7 @@ main (void) return 0; } -/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } */ +/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } + { dg-note {requires-1\.c'
Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases (was: [PATCH] c: Fix location for _Pragma tokens [PR97498])
Hi! On 2022-07-10T16:51:11-0400, Lewis Hyatt via Gcc-patches wrote: > On Sat, Jul 9, 2022 at 11:59 PM Jeff Law via Gcc-patches > wrote: >> On 7/9/2022 2:52 PM, Lewis Hyatt via Gcc-patches wrote: >> > PR97498 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97498) is another PR >> > related to the fact that imprecise locations for _Pragma result in >> > counterintuitive behavior for GCC diagnostic pragmas >> > I think the main source of problems for all remaining issues is that we use >> > the global input_location for deciding when/if a diagnostic should apply. I >> > think it should be eventually doable to eliminate this, and rather properly >> > resolve the token locations to the place they need to be >> I've long wanted to see our dependency on input_location be diminished >> with the goal of making it go away completely. > [...] > Then I will plan to work on > eliminating input_location from c-pragma.cc as a longer term goal. Great; I too am looking forward to that. There, and then elsewhere, everywhere. :-) >> > The rest of [patch] is just tweaking a couple tests which were sensitive >> > to the >> > location being output. In all these cases, the new locations seem more >> > informative to me than the old ones. ACK, thanks. On top of that, I've just pushed to master branch commit 06b2a2abe26554c6f9365676683d67368cbba206 "Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases", see attached. Grüße Thomas ----- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 06b2a2abe26554c6f9365676683d67368cbba206 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 11 Jul 2022 09:33:19 +0200 Subject: [PATCH] Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases Follow-up to recent commit 0587cef3d7962a8b0f44779589ba2920dd3d71e5 "c: Fix location for _Pragma tokens [PR97498]". gcc/testsuite/ * c-c++-common/gomp/pragma-3.c: Enhance '_Pragma' diagnostics verification. * c-c++-common/gomp/pragma-5.c: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Enhance '_Pragma' diagnostics verification. --- gcc/testsuite/c-c++-common/gomp/pragma-3.c| 8 +--- gcc/testsuite/c-c++-common/gomp/pragma-5.c| 8 +--- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c | 8 +--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/c-c++-common/gomp/pragma-3.c b/gcc/testsuite/c-c++-common/gomp/pragma-3.c index ae18e9b8886..3e1b2111c3d 100644 --- a/gcc/testsuite/c-c++-common/gomp/pragma-3.c +++ b/gcc/testsuite/c-c++-common/gomp/pragma-3.c @@ -2,13 +2,15 @@ /* PR preprocessor/103165 */ #define inner(...) #__VA_ARGS__ ; _Pragma("omp error severity(warning) message (\"Test\") at(compilation)") /* { dg-line inner_location } */ -#define outer(...) inner(__VA_ARGS__) +#define outer(...) inner(__VA_ARGS__) /* { dg-line outer_location } */ void f (void) { - const char *str = outer(inner(1,2)); - /* { dg-warning "'pragma omp error' encountered: Test" "inner expansion" { target *-*-* } inner_location } */ + const char *str = outer(inner(1,2)); /* { dg-line str_location } */ + /* { dg-warning "35:'pragma omp error' encountered: Test" "" { target *-*-* } inner_location } + { dg-note "20:in expansion of macro 'inner'" "" { target *-*-* } outer_location } + { dg-note "21:in expansion of macro 'outer'" "" { target *-*-* } str_location } */ } #if 0 diff --git a/gcc/testsuite/c-c++-common/gomp/pragma-5.c b/gcc/testsuite/c-c++-common/gomp/pragma-5.c index 8124f701502..173c25e803a 100644 --- a/gcc/testsuite/c-c++-common/gomp/pragma-5.c +++ b/gcc/testsuite/c-c++-common/gomp/pragma-5.c @@ -2,13 +2,15 @@ /* PR preprocessor/103165 */ #define inner(...) #__VA_ARGS__ ; _Pragma ( " omp error severity (warning) message (\"Test\") at(compilation)" ) /* { dg-line inner_location } */ -#define outer(...) inner(__VA_ARGS__) +#define outer(...) inner(__VA_ARGS__) /* { dg-line outer_location } */ void f (void) { - const char *str = outer(inner(1,2)); - /* { dg-warning "'pragma omp error' encountered: Test" "inner expansion" { target *-*-* } inner_location } */ + const char *str = outer(inner(1,2)); /* { dg-line str_location } */ + /* { dg-warning "35:'pragma omp error' encountered: Test" "" { target *-*-* } inner_location } + { dg-note &quo
XFAIL 'offloading_enabled' diagnostics issue in 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551] (was: Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases)
Hi! On 2022-07-11T11:27:12+0200, I wrote: > [...], I've just pushed to master branch > commit 06b2a2abe26554c6f9365676683d67368cbba206 > "Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases" > --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c > @@ -17,7 +17,7 @@ const int n = 100; > #define check_reduction(gwv_par, gwv_loop) \ >{ \ >s1 = 2; s2 = 5;\ > -DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \ > +DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) /* { dg-line DO_PRAGMA_loc } > */ \ > DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \ > for (i = 0; i < n; i++) \ >{ \ > @@ -45,8 +45,10 @@ main (void) > >/* Nvptx targets require a vector_length or 32 in to allow spinlocks with > gangs. */ > - check_reduction (num_workers (nw) vector_length (vl), worker); > - /* { dg-warning "region is vector partitioned but does not contain vector > partitioned code" "test1" { target *-*-* } pragma_loc } */ > + check_reduction (num_workers (nw) vector_length (vl), worker); /* { > dg-line check_reduction_loc } > + /* { dg-warning "22:region is vector partitioned but does not contain > vector partitioned code" "" { target *-*-* } pragma_loc } > + { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* } > DO_PRAGMA_loc } > + { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* > } check_reduction_loc } */ Oh my, PR101551 "[offloading] Differences in diagnostics etc." strikes again... The latter two 'note' diagnostics are currently only emitted in non-offloading configurations. I've now pushed to master branch commit 3723aedaad20a129741c2f6f3c22b3dd1220a3fc "XFAIL 'offloading_enabled' diagnostics issue in 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551]", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 3723aedaad20a129741c2f6f3c22b3dd1220a3fc Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 12 Jul 2022 08:17:37 +0200 Subject: [PATCH] XFAIL 'offloading_enabled' diagnostics issue in 'libgomp.oacc-c-c++-common/reduction-5.c' [PR101551] Fix-up for recent commit 06b2a2abe26554c6f9365676683d67368cbba206 "Enhance '_Pragma' diagnostics verification in OMP C/C++ test cases". Supposedly it's the same issue as in <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101551#c2>, where I'd noted that: | [...] with an offloading-enabled build of GCC we're losing | "note: in expansion of macro '[...]'" diagnostics. | (Effectively '-ftrack-macro-expansion=0'?) PR middle-end/101551 libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: XFAIL 'offloading_enabled' diagnostics issue. --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c index 72094609f0f..ddccfe89e73 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c @@ -45,10 +45,11 @@ main (void) /* Nvptx targets require a vector_length or 32 in to allow spinlocks with gangs. */ - check_reduction (num_workers (nw) vector_length (vl), worker); /* { dg-line check_reduction_loc } + check_reduction (num_workers (nw) vector_length (vl), worker); /* { dg-line check_reduction_loc } */ /* { dg-warning "22:region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } pragma_loc } - { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* } DO_PRAGMA_loc } - { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* } check_reduction_loc } */ + { dg-note "1:in expansion of macro 'DO_PRAGMA'" "" { target *-*-* xfail offloading_enabled } DO_PRAGMA_loc } + { dg-note "3:in expansion of macro 'check_reduction'" "" { target *-*-* xfail offloading_enabled } check_reduction_loc } + TODO See PR101551 for 'offloading_enabled' XFAILs. */ check_reduction (vector_length (vl), vector); check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang worker vector); -- 2.35.1
[PING^3] nvptx: forward '-v' command-line option to assembler, linker
Hi Tom! Ping. Grüße Thomas On 2022-07-05T16:58:54+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-06-07T17:41:16+0200, I wrote: >> Hi! >> >> On 2022-05-30T09:06:21+0200, Tobias Burnus wrote: >>> On 29.05.22 22:49, Thomas Schwinge wrote: >>>> Not sure if that's what you had in mind, but what do you think about the >>>> attached "nvptx: forward '-v' command-line option to assembler, linker"? >>>> OK to push to GCC master branch (after merging >>>> <https://github.com/MentorEmbedded/nvptx-tools/pull/37> >>>> "Put '-v' verbose output onto stderr instead of stdout")? >>> >>> I was mainly thinking of some way to have it available — which >>> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the >>> nvptx-tools patch actually makes use of the '-v'.) >> >> (Merged a week ago.) >> >>> If I understand your patch correctly, this patch now causes 'gcc -v' to >>> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already >>> outputs a lot of lines and those lines can be helpful to understand what >>> happens and what not. >> >> ACK. >> >>> Tom, your thoughts on this? >> >> Ping. >> >> >> Grüße >> Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 29 May 2022 22:31:43 +0200 Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker For example, for offloading compilation with '-save-temps -v', before vs. after word-diff then looks like: [...] [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s {+Verifying sm_30 code with sm_35 code generation.+} {+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+} [...] [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc {+collect2 version 12.0.1 20220428 (experimental)+} {+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+} {+Linking ./a.xnvptx-none.mkoffload.o as 0+} {+trying lib libc.a+} {+trying lib libgcc.a+} {+trying lib libgomp.a+} {+Resolving abort+} {+Resolving acc_on_device+} {+Linking libgomp.a::oacc-init.o/ as 1+} {+Linking libc.a::lib_a-abort.o/ as 2+} [...] (This depends on <https://github.com/MentorEmbedded/nvptx-tools/pull/37> "Put '-v' verbose output onto stderr instead of stdout".) gcc/ * config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define. --- gcc/config/nvptx/nvptx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index ed72c253191..b184f1d0150 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -27,6 +27,13 @@ /* Run-time Target. */ +/* Assembler supports '-v' option; handle similar to + '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */ +#define ASM_SPEC "%{v}" + +/* Linker supports '-v' option. */ +#define LINK_SPEC "%{v}" + #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () -- 2.25.1
[PING^2] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)
Hi Tom! Ping. Grüße Thomas On 2022-07-05T16:59:23+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-06-15T23:18:10+0200, I wrote: >> Hi Tom! >> >> On 2022-05-13T16:20:14+0200, I wrote: >>> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc wrote: >>>> On 2/4/22 08:21, Thomas Schwinge wrote: >>>>> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" >>>>> wrote: >>>>>> I've tested this using (recommended) driver 470.94 on boards: >>> >>>>>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { >>>>>> GOMP_NVPTX_JIT=-O0, }. >>>>> >>>>> Do you use separate (nvptx-none offload target only?) builds for >>>>> different '-mptx' variants (likewise: '-misa'), or have you hacked up the >>>>> multilib configuration? >>>> >>>> Neither, I'm using --target_board=unix/foffload= for that. >>> >>> ACK, I see. So these flags then only affect GCC/nvptx code generation >>> for the actual user code (here: GCC libgomp test cases), but for the >>> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp -- >>> the latter especially relevant for OpenMP), it uses PTX code from one of >>> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'. >>> >>> Meaning, one can't just use such a flag for "completely building code" >>> for a specific configuration. Random example, >>> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target >>> libraries aren't being built for '-march=sm_75' multilib, >>> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib, >>> which isn't '-march=sm_75'. >>> >>> >>>> ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS' >>>>> etc., I suppose?) Should we add a few representative configurations to >>>>> be built by default? And/or, should we have a way to 'configure' per >>>>> user needs (I suppose: '--with-multilib-list=[...]', as supported for a >>>>> few other targets?)? (I see there's also a new >>>>> '--with-multilib-generator=[...]', haven't looked in detail.) No matter >>>>> which way: again, combinatorial explosion is a problem, of course... >>>> >>>> As far as I know, the gcc build doesn't finish when switching default to >>>> higher than sm_35, so there's little point to go to a multilib setup at >>>> this point. But once we fix that, we could reconsider, otherwise, >>>> things are likely to regress again. >>> >>> As far as I remember, several issues have been fixed. Still waiting for >>> Roger's "middle-end: Support ABIs that pass FP values as wider integers" >>> or something similar, but that PR104489 issue is being worked around by >>> "Limit HFmode support to mexperimental", if I got that right. >>> >>> Now I'm not suggesting we should now enable all or any random GCC/nvptx >>> multilibs, to get all these variants of GCC/nvptx target libraries built; >>> especially also given that GCC/nvptx code generation currently doesn't >>> make too much use of the new capabilities. >>> >>> However, we do have a specific request that a customer would like to be >>> able to change at GCC 'configure' time the GCC/nvptx default multilib >>> (including that being used for building corresponding GCC/nvptx target >>> libraries). >>> >>> Per 'gcc/doc/install.texi', I do see that some GCC targets allow for >>> GCC 'configure'-time '--with-multilib-list=[...]', or >>> '--with-multilib-generator=[...]', and I suppose we could be doing >>> something similar? But before starting implementing, I'd like your >>> input, as you'll be the one to approve in the end. And/or, maybe you've >>> already made up your own ideas about that? >> >> So, instead of "random GCC/nvptx multilib configuration" (last >> paragraph), I've come up with a way to implement our customer's request >> (second last paragraph): 'configure' GCC/nvptx '--with-arch=sm_70'. >> >> I think I've implemented this in a way so that "random GCC/nvptx mult
[PING^4] nvptx: forward '-v' command-line option to assembler, linker
Hi Tom! Ping. Grüße Thomas On 2022-07-13T10:41:23+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-05T16:58:54+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-06-07T17:41:16+0200, I wrote: >>> Hi! >>> >>> On 2022-05-30T09:06:21+0200, Tobias Burnus wrote: >>>> On 29.05.22 22:49, Thomas Schwinge wrote: >>>>> Not sure if that's what you had in mind, but what do you think about the >>>>> attached "nvptx: forward '-v' command-line option to assembler, linker"? >>>>> OK to push to GCC master branch (after merging >>>>> <https://github.com/MentorEmbedded/nvptx-tools/pull/37> >>>>> "Put '-v' verbose output onto stderr instead of stdout")? >>>> >>>> I was mainly thinking of some way to have it available — which >>>> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the >>>> nvptx-tools patch actually makes use of the '-v'.) >>> >>> (Merged a week ago.) >>> >>>> If I understand your patch correctly, this patch now causes 'gcc -v' to >>>> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already >>>> outputs a lot of lines and those lines can be helpful to understand what >>>> happens and what not. >>> >>> ACK. >>> >>>> Tom, your thoughts on this? >>> >>> Ping. >>> >>> >>> Grüße >>> Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 29 May 2022 22:31:43 +0200 Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker For example, for offloading compilation with '-save-temps -v', before vs. after word-diff then looks like: [...] [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s {+Verifying sm_30 code with sm_35 code generation.+} {+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+} [...] [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc {+collect2 version 12.0.1 20220428 (experimental)+} {+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+} {+Linking ./a.xnvptx-none.mkoffload.o as 0+} {+trying lib libc.a+} {+trying lib libgcc.a+} {+trying lib libgomp.a+} {+Resolving abort+} {+Resolving acc_on_device+} {+Linking libgomp.a::oacc-init.o/ as 1+} {+Linking libc.a::lib_a-abort.o/ as 2+} [...] (This depends on <https://github.com/MentorEmbedded/nvptx-tools/pull/37> "Put '-v' verbose output onto stderr instead of stdout".) gcc/ * config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define. --- gcc/config/nvptx/nvptx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index ed72c253191..b184f1d0150 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -27,6 +27,13 @@ /* Run-time Target. */ +/* Assembler supports '-v' option; handle similar to + '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */ +#define ASM_SPEC "%{v}" + +/* Linker supports '-v' option. */ +#define LINK_SPEC "%{v}" + #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () -- 2.25.1
[PING^3] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)
Hi Tom! Ping. Grüße Thomas On 2022-07-13T10:42:44+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-05T16:59:23+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-06-15T23:18:10+0200, I wrote: >>> Hi Tom! >>> >>> On 2022-05-13T16:20:14+0200, I wrote: >>>> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc wrote: >>>>> On 2/4/22 08:21, Thomas Schwinge wrote: >>>>>> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" >>>>>> wrote: >>>>>>> I've tested this using (recommended) driver 470.94 on boards: >>>> >>>>>>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { >>>>>>> GOMP_NVPTX_JIT=-O0, }. >>>>>> >>>>>> Do you use separate (nvptx-none offload target only?) builds for >>>>>> different '-mptx' variants (likewise: '-misa'), or have you hacked up the >>>>>> multilib configuration? >>>>> >>>>> Neither, I'm using --target_board=unix/foffload= for that. >>>> >>>> ACK, I see. So these flags then only affect GCC/nvptx code generation >>>> for the actual user code (here: GCC libgomp test cases), but for the >>>> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp -- >>>> the latter especially relevant for OpenMP), it uses PTX code from one of >>>> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'. >>>> >>>> Meaning, one can't just use such a flag for "completely building code" >>>> for a specific configuration. Random example, >>>> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target >>>> libraries aren't being built for '-march=sm_75' multilib, >>>> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib, >>>> which isn't '-march=sm_75'. >>>> >>>> >>>>> ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS' >>>>>> etc., I suppose?) Should we add a few representative configurations to >>>>>> be built by default? And/or, should we have a way to 'configure' per >>>>>> user needs (I suppose: '--with-multilib-list=[...]', as supported for a >>>>>> few other targets?)? (I see there's also a new >>>>>> '--with-multilib-generator=[...]', haven't looked in detail.) No matter >>>>>> which way: again, combinatorial explosion is a problem, of course... >>>>> >>>>> As far as I know, the gcc build doesn't finish when switching default to >>>>> higher than sm_35, so there's little point to go to a multilib setup at >>>>> this point. But once we fix that, we could reconsider, otherwise, >>>>> things are likely to regress again. >>>> >>>> As far as I remember, several issues have been fixed. Still waiting for >>>> Roger's "middle-end: Support ABIs that pass FP values as wider integers" >>>> or something similar, but that PR104489 issue is being worked around by >>>> "Limit HFmode support to mexperimental", if I got that right. >>>> >>>> Now I'm not suggesting we should now enable all or any random GCC/nvptx >>>> multilibs, to get all these variants of GCC/nvptx target libraries built; >>>> especially also given that GCC/nvptx code generation currently doesn't >>>> make too much use of the new capabilities. >>>> >>>> However, we do have a specific request that a customer would like to be >>>> able to change at GCC 'configure' time the GCC/nvptx default multilib >>>> (including that being used for building corresponding GCC/nvptx target >>>> libraries). >>>> >>>> Per 'gcc/doc/install.texi', I do see that some GCC targets allow for >>>> GCC 'configure'-time '--with-multilib-list=[...]', or >>>> '--with-multilib-generator=[...]', and I suppose we could be doing >>>> something similar? But before starting implementing, I'd like your >>>> input, as you'll be the one to approve in the end. And/or, maybe you've >>>> already ma
[PING^5] nvptx: forward '-v' command-line option to assembler, linker
Hi Tom! Ping. Grüße Thomas On 2022-07-20T14:44:36+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-13T10:41:23+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-07-05T16:58:54+0200, I wrote: >>> Hi Tom! >>> >>> Ping. >>> >>> >>> Grüße >>> Thomas >>> >>> >>> On 2022-06-07T17:41:16+0200, I wrote: >>>> Hi! >>>> >>>> On 2022-05-30T09:06:21+0200, Tobias Burnus wrote: >>>>> On 29.05.22 22:49, Thomas Schwinge wrote: >>>>>> Not sure if that's what you had in mind, but what do you think about the >>>>>> attached "nvptx: forward '-v' command-line option to assembler, linker"? >>>>>> OK to push to GCC master branch (after merging >>>>>> <https://github.com/MentorEmbedded/nvptx-tools/pull/37> >>>>>> "Put '-v' verbose output onto stderr instead of stdout")? >>>>> >>>>> I was mainly thinking of some way to have it available — which >>>>> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the >>>>> nvptx-tools patch actually makes use of the '-v'.) >>>> >>>> (Merged a week ago.) >>>> >>>>> If I understand your patch correctly, this patch now causes 'gcc -v' to >>>>> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already >>>>> outputs a lot of lines and those lines can be helpful to understand what >>>>> happens and what not. >>>> >>>> ACK. >>>> >>>>> Tom, your thoughts on this? >>>> >>>> Ping. >>>> >>>> >>>> Grüße >>>> Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 29 May 2022 22:31:43 +0200 Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker For example, for offloading compilation with '-save-temps -v', before vs. after word-diff then looks like: [...] [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s {+Verifying sm_30 code with sm_35 code generation.+} {+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+} [...] [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc {+collect2 version 12.0.1 20220428 (experimental)+} {+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+} {+Linking ./a.xnvptx-none.mkoffload.o as 0+} {+trying lib libc.a+} {+trying lib libgcc.a+} {+trying lib libgomp.a+} {+Resolving abort+} {+Resolving acc_on_device+} {+Linking libgomp.a::oacc-init.o/ as 1+} {+Linking libc.a::lib_a-abort.o/ as 2+} [...] (This depends on <https://github.com/MentorEmbedded/nvptx-tools/pull/37> "Put '-v' verbose output onto stderr instead of stdout".) gcc/ * config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define. --- gcc/config/nvptx/nvptx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index ed72c253191..b184f1d0150 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -27,6 +27,13 @@ /* Run-time Target. */ +/* Assembler supports '-v' option; handle similar to + '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */ +#define ASM_SPEC "%{v}" + +/* Linker supports '-v' option. */ +#define LINK_SPEC "%{v}" + #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () -- 2.25.1
[PING^4] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)
Hi Tom! Ping. Grüße Thomas On 2022-07-20T14:46:03+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-13T10:42:44+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-07-05T16:59:23+0200, I wrote: >>> Hi Tom! >>> >>> Ping. >>> >>> >>> Grüße >>> Thomas >>> >>> >>> On 2022-06-15T23:18:10+0200, I wrote: >>>> Hi Tom! >>>> >>>> On 2022-05-13T16:20:14+0200, I wrote: >>>>> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc >>>>> wrote: >>>>>> On 2/4/22 08:21, Thomas Schwinge wrote: >>>>>>> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" >>>>>>> wrote: >>>>>>>> I've tested this using (recommended) driver 470.94 on boards: >>>>> >>>>>>>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { >>>>>>>> GOMP_NVPTX_JIT=-O0, }. >>>>>>> >>>>>>> Do you use separate (nvptx-none offload target only?) builds for >>>>>>> different '-mptx' variants (likewise: '-misa'), or have you hacked up >>>>>>> the >>>>>>> multilib configuration? >>>>>> >>>>>> Neither, I'm using --target_board=unix/foffload= for that. >>>>> >>>>> ACK, I see. So these flags then only affect GCC/nvptx code generation >>>>> for the actual user code (here: GCC libgomp test cases), but for the >>>>> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp -- >>>>> the latter especially relevant for OpenMP), it uses PTX code from one of >>>>> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'. >>>>> >>>>> Meaning, one can't just use such a flag for "completely building code" >>>>> for a specific configuration. Random example, >>>>> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target >>>>> libraries aren't being built for '-march=sm_75' multilib, >>>>> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib, >>>>> which isn't '-march=sm_75'. >>>>> >>>>> >>>>>> ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS' >>>>>>> etc., I suppose?) Should we add a few representative configurations to >>>>>>> be built by default? And/or, should we have a way to 'configure' per >>>>>>> user needs (I suppose: '--with-multilib-list=[...]', as supported for a >>>>>>> few other targets?)? (I see there's also a new >>>>>>> '--with-multilib-generator=[...]', haven't looked in detail.) No matter >>>>>>> which way: again, combinatorial explosion is a problem, of course... >>>>>> >>>>>> As far as I know, the gcc build doesn't finish when switching default to >>>>>> higher than sm_35, so there's little point to go to a multilib setup at >>>>>> this point. But once we fix that, we could reconsider, otherwise, >>>>>> things are likely to regress again. >>>>> >>>>> As far as I remember, several issues have been fixed. Still waiting for >>>>> Roger's "middle-end: Support ABIs that pass FP values as wider integers" >>>>> or something similar, but that PR104489 issue is being worked around by >>>>> "Limit HFmode support to mexperimental", if I got that right. >>>>> >>>>> Now I'm not suggesting we should now enable all or any random GCC/nvptx >>>>> multilibs, to get all these variants of GCC/nvptx target libraries built; >>>>> especially also given that GCC/nvptx code generation currently doesn't >>>>> make too much use of the new capabilities. >>>>> >>>>> However, we do have a specific request that a customer would like to be >>>>> able to change at GCC 'configure' time the GCC/nvptx default multilib >>>>> (including that being used for building corresponding GCC/nvptx target >>>>> libraries). >>>>> >>
Re: [PATCH Rust front-end v1 2/4] Add Rust lang TargetHooks for i386 and x86_64
Hi! On 2022-07-27T14:40:38+0100, "herron.philip--- via Gcc-patches" wrote: > This patch introduces a new set of interfaces to define the target info as > expected by the rust front-end. It takes advantage of the information > within gcc/config/target directories which gets called by the front-end > to populate rust front-end datastructures by calling into: > builtin_rust_info. This patch has been isolated to find if we are > approaching this in an idiomatic way and is compilable without the > rust-front-end code. I suppose the general approach may be fine, as is similarly implemented by other languages' front ends in GCC. > We have received many patches here which gives us the target hook info for > most platforms But this is all so much WIP and full of TODO notes, and has no test cases at all!, that I still don't really see much value in keeping the current implementations of 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO', etc. Applying "[HACK] Disable 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO'" that I've attached, we're not seeing any change in 'make check-rust' results, for example. In my opinion, the current implementation should be backed out from the main development branch (would also reduce pain in merges from GCC upstream, as mentioned before), and then be developed (quite possibly based on the current implementation) individually for all GCC configurations that we'd like to support (with 'sorry' otherwise), in a coherent way, instead of trying to guess all possible target options as done by the current implementation. And, with all relevant test cases getting added, of course. That is, at this time, restrict outselves to GCC configurations that we're actually supporting and testing. Have we even figured out which of those target options are actually mandated for a conforming Rust programming language implementation (that is, users would potentially rely on these)? As far as I can tell, 'rustc' defines target options here: <https://github.com/rust-lang/rust/tree/master/compiler/rustc_target/src/spec>, and you may use 'rustc --print=cfg' to dump for the current configuration? > but getting the normal x86 done correctly will define if > the other patches are done correctly. Yes -- but I'm not sure this is it really, in its current WIPy, un-tested, un-verified form: > gcc/config/ChangeLog: > * gnu.h: add new macro GNU_USER_TARGET_RUST_OS_INFO > * dragonfly.h: define TARGET_RUST_OS_INFO > * freebsd-spec.h: define FBSD_TARGET_RUST_OS_INFO > * freebsd.h: define guard for TARGET_RUST_OS_INFO > * fuchsia.h: define TARGET_RUST_OS_INFO > * kfreebsd-gnu.h: define GNU_USER_TARGET_RUST_OS_INFO > * kopensolaris-gnu.h: define GNU_USER_TARGET_RUST_OS_INFO > * linux-android.h: define ANDROID_TARGET_RUST_OS_INFO > * linux.h: define GNU_USER_TARGET_RUST_OS_INFO > * netbsd.h: define NETBSD_TARGET_RUST_OS_INFO > * openbsd.h: define OPENBSD_TARGET_RUST_OS_INFO > * phoenix.h: define TARGET_RUST_OS_INFO > * sol2.h: define TARGET_RUST_OS_INFO > * vxworks.h: define VXWORKS_TARGET_RUST_OS_INFO > * vxworksae.h: define VXWORKS_TARGET_RUST_OS_INFO > > gcc/config/i386/ChangeLog: > > * crtdll.h: define EXTRA_TARGET_RUST_OS_INFO > * cygming.h: define TARGET_RUST_OS_INFO > * cygwin.h: define EXTRA_TARGET_RUST_OS_INFO > * darwin.h: define TARGET_RUST_OS_INFO > * djgpp.h: likewise > * gnu-user-common.h: define TARGET_RUST_OS_INFO > * i386-protos.h: prototype for ix86_rust_target_cpu_info > * i386-rust.cc: new file to generate the rust target host info > * i386.h: define TARGET_RUST_CPU_INFO hook > * linux-common.h: define hooks for target info > * lynx.h: likewise > * mingw32.h: likewise > * netbsd-elf.h: likewise > * netbsd64.h: likewise > * nto.h: likewise > * openbsdelf.h: likewise > * rdos.h: likewise > * rtemself.h: likewise > * t-i386: add makefilke rule for i386-rust.cc > * vxworks.h: define TARGET_RUST_OS_INFO Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From a5688c5da3c9ffda614f4138e55f46b7078b9e3a Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 28 Jul 2022 12:04:28 +0200 Subject: [PATCH] [HACK] Disable 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO' --- gcc/rust/rust-lang.cc| 2 ++ gcc/rust/ru
Re: [PATCH Rust front-end v1 2/4] Add Rust lang TargetHooks for i386 and x86_64
Hi Phil! On 2022-07-28T11:51:37+0100, Philip Herron wrote: > I think you are right here. There are parts in > libstd/liballoc/libpanic which start to look for what CPU features are > available iirc. Aha, good. That -- once we get there ;-) -- shall then guide us on the target options we implement, in addition to what we find generally necessary for a conforming Rust programming language implementation (as I'd mentioned). > libcore [...] just > cares about target pointer width and endienness which is more > generally available as macros. Right, and these are already implemented in 'gcc/rust/rust-session-manager.cc:Session::init'. (..., but also should get some test cases added; I'll have a look at some point.) > It seems more clear now that maybe for this v1 set of patches, > possibly this stuff doesn't really matter right now until we compile > libstd which seems like a much better approach in order to review the > front-end code. I think i will apply your patch and revert these > changes for now since we have the git history for them we can look at > this more closely when we need it. Unless this issue is time-critical, let me offer that instead of my "[HACK] Disable 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO'", I'll cook up a proper patch, removing the implementations of 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO', etc., but keeping the general infrastructure in place (if I find that makes sense)? Grüße Thomas > On Thu, 28 Jul 2022 at 11:38, Thomas Schwinge wrote: >> >> Hi! >> >> On 2022-07-27T14:40:38+0100, "herron.philip--- via Gcc-patches" >> wrote: >> > This patch introduces a new set of interfaces to define the target info as >> > expected by the rust front-end. It takes advantage of the information >> > within gcc/config/target directories which gets called by the front-end >> > to populate rust front-end datastructures by calling into: >> > builtin_rust_info. This patch has been isolated to find if we are >> > approaching this in an idiomatic way and is compilable without the >> > rust-front-end code. >> >> I suppose the general approach may be fine, as is similarly implemented >> by other languages' front ends in GCC. >> >> > We have received many patches here which gives us the target hook info for >> > most platforms >> >> But this is all so much WIP and full of TODO notes, and has no test cases >> at all!, that I still don't really see much value in keeping the current >> implementations of 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO', etc. >> Applying "[HACK] Disable 'TARGET_RUST_CPU_INFO', 'TARGET_RUST_OS_INFO'" >> that I've attached, we're not seeing any change in 'make check-rust' >> results, for example. >> >> In my opinion, the current implementation should be backed out from the >> main development branch (would also reduce pain in merges from GCC >> upstream, as mentioned before), and then be developed (quite possibly >> based on the current implementation) individually for all GCC >> configurations that we'd like to support (with 'sorry' otherwise), in a >> coherent way, instead of trying to guess all possible target options as >> done by the current implementation. And, with all relevant test cases >> getting added, of course. That is, at this time, restrict outselves to >> GCC configurations that we're actually supporting and testing. >> >> Have we even figured out which of those target options are actually >> mandated for a conforming Rust programming language implementation (that >> is, users would potentially rely on these)? >> >> As far as I can tell, 'rustc' defines target options here: >> <https://github.com/rust-lang/rust/tree/master/compiler/rustc_target/src/spec>, >> and you may use 'rustc --print=cfg' to dump for the current >> configuration? >> >> > but getting the normal x86 done correctly will define if >> > the other patches are done correctly. >> >> Yes -- but I'm not sure this is it really, in its current WIPy, >> un-tested, un-verified form: >> >> > gcc/config/ChangeLog: >> >> > * gnu.h: add new macro GNU_USER_TARGET_RUST_OS_INFO >> > * dragonfly.h: define TARGET_RUST_OS_INFO >> > * freebsd-spec.h: define FBSD_TARGET_RUST_OS_INFO >> > * freebsd.h: define guard for TARGET_RUST_OS_INFO >> > * fuchsia.h: define TARGET_RUST_OS_INFO >> > * kfreebsd-g
[PING^6] nvptx: forward '-v' command-line option to assembler, linker
Hi Tom! Ping. Grüße Thomas On 2022-07-27T17:48:46+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-20T14:44:36+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-07-13T10:41:23+0200, I wrote: >>> Hi Tom! >>> >>> Ping. >>> >>> >>> Grüße >>> Thomas >>> >>> >>> On 2022-07-05T16:58:54+0200, I wrote: >>>> Hi Tom! >>>> >>>> Ping. >>>> >>>> >>>> Grüße >>>> Thomas >>>> >>>> >>>> On 2022-06-07T17:41:16+0200, I wrote: >>>>> Hi! >>>>> >>>>> On 2022-05-30T09:06:21+0200, Tobias Burnus >>>>> wrote: >>>>>> On 29.05.22 22:49, Thomas Schwinge wrote: >>>>>>> Not sure if that's what you had in mind, but what do you think about the >>>>>>> attached "nvptx: forward '-v' command-line option to assembler, linker"? >>>>>>> OK to push to GCC master branch (after merging >>>>>>> <https://github.com/MentorEmbedded/nvptx-tools/pull/37> >>>>>>> "Put '-v' verbose output onto stderr instead of stdout")? >>>>>> >>>>>> I was mainly thinking of some way to have it available — which >>>>>> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the >>>>>> nvptx-tools patch actually makes use of the '-v'.) >>>>> >>>>> (Merged a week ago.) >>>>> >>>>>> If I understand your patch correctly, this patch now causes 'gcc -v' to >>>>>> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already >>>>>> outputs a lot of lines and those lines can be helpful to understand what >>>>>> happens and what not. >>>>> >>>>> ACK. >>>>> >>>>>> Tom, your thoughts on this? >>>>> >>>>> Ping. >>>>> >>>>> >>>>> Grüße >>>>> Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 29 May 2022 22:31:43 +0200 Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker For example, for offloading compilation with '-save-temps -v', before vs. after word-diff then looks like: [...] [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s {+Verifying sm_30 code with sm_35 code generation.+} {+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+} [...] [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc {+collect2 version 12.0.1 20220428 (experimental)+} {+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+} {+Linking ./a.xnvptx-none.mkoffload.o as 0+} {+trying lib libc.a+} {+trying lib libgcc.a+} {+trying lib libgomp.a+} {+Resolving abort+} {+Resolving acc_on_device+} {+Linking libgomp.a::oacc-init.o/ as 1+} {+Linking libc.a::lib_a-abort.o/ as 2+} [...] (This depends on <https://github.com/MentorEmbedded/nvptx-tools/pull/37> "Put '-v' verbose output onto stderr instead of stdout".) gcc/ * config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define. --- gcc/config/nvptx/nvptx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index ed72c253191..b184f1d0150 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -27,6 +27,13 @@ /* Run-time Target. */ +/* Assembler supports '-v' option; handle similar to + '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */ +#define ASM_SPEC "%{v}" + +/* Linker supports '-v' option. */ +#define LINK_SPEC "%{v}" + #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () -- 2.25.1
[PING^5] nvptx: Allow '--with-arch' to override the default '-misa' (was: nvptx multilib setup)
Hi Tom! Ping. Grüße Thomas On 2022-07-27T17:48:58+0200, I wrote: > Hi Tom! > > Ping. > > > Grüße > Thomas > > > On 2022-07-20T14:46:03+0200, I wrote: >> Hi Tom! >> >> Ping. >> >> >> Grüße >> Thomas >> >> >> On 2022-07-13T10:42:44+0200, I wrote: >>> Hi Tom! >>> >>> Ping. >>> >>> >>> Grüße >>> Thomas >>> >>> >>> On 2022-07-05T16:59:23+0200, I wrote: >>>> Hi Tom! >>>> >>>> Ping. >>>> >>>> >>>> Grüße >>>> Thomas >>>> >>>> >>>> On 2022-06-15T23:18:10+0200, I wrote: >>>>> Hi Tom! >>>>> >>>>> On 2022-05-13T16:20:14+0200, I wrote: >>>>>> On 2022-02-04T13:09:29+0100, Tom de Vries via Gcc >>>>>> wrote: >>>>>>> On 2/4/22 08:21, Thomas Schwinge wrote: >>>>>>>> On 2022-02-03T13:35:55+, "vries at gcc dot gnu.org via Gcc-bugs" >>>>>>>> wrote: >>>>>>>>> I've tested this using (recommended) driver 470.94 on boards: >>>>>> >>>>>>>>> while iterating over dimensions { -mptx=3.1 , -mptx=6.3 } x { >>>>>>>>> GOMP_NVPTX_JIT=-O0, }. >>>>>>>> >>>>>>>> Do you use separate (nvptx-none offload target only?) builds for >>>>>>>> different '-mptx' variants (likewise: '-misa'), or have you hacked up >>>>>>>> the >>>>>>>> multilib configuration? >>>>>>> >>>>>>> Neither, I'm using --target_board=unix/foffload= for that. >>>>>> >>>>>> ACK, I see. So these flags then only affect GCC/nvptx code generation >>>>>> for the actual user code (here: GCC libgomp test cases), but for the >>>>>> GCC/nvptx target libraries (such as: libc, libm, libgfortran, libgomp -- >>>>>> the latter especially relevant for OpenMP), it uses PTX code from one of >>>>>> the two "pre-compiled" GCC/nvptx multilibs: default or '-mptx=3.1'. >>>>>> >>>>>> Meaning, one can't just use such a flag for "completely building code" >>>>>> for a specific configuration. Random example, >>>>>> '-foffload-options=nvptx-none=-march=sm_75': as GCC/nvptx target >>>>>> libraries aren't being built for '-march=sm_75' multilib, >>>>>> '-foffload-options=nvptx-none=-march=sm_75' uses the default multilib, >>>>>> which isn't '-march=sm_75'. >>>>>> >>>>>> >>>>>>> ('gcc/config/nvptx/t-nvptx:MULTILIB_OPTIONS' >>>>>>>> etc., I suppose?) Should we add a few representative configurations to >>>>>>>> be built by default? And/or, should we have a way to 'configure' per >>>>>>>> user needs (I suppose: '--with-multilib-list=[...]', as supported for a >>>>>>>> few other targets?)? (I see there's also a new >>>>>>>> '--with-multilib-generator=[...]', haven't looked in detail.) No >>>>>>>> matter >>>>>>>> which way: again, combinatorial explosion is a problem, of course... >>>>>>> >>>>>>> As far as I know, the gcc build doesn't finish when switching default to >>>>>>> higher than sm_35, so there's little point to go to a multilib setup at >>>>>>> this point. But once we fix that, we could reconsider, otherwise, >>>>>>> things are likely to regress again. >>>>>> >>>>>> As far as I remember, several issues have been fixed. Still waiting for >>>>>> Roger's "middle-end: Support ABIs that pass FP values as wider integers" >>>>>> or something similar, but that PR104489 issue is being worked around by >>>>>> "Limit HFmode support to mexperimental", if I got that right. >>>>>> >>>>>> Now I'm not suggesting we should now enable all or any random GCC/nvptx >>>>>> multilibs, to get all these variants of GCC/nvptx target libraries built; >>>>>> especially als
Re: [patch] libgomp: cuda.h and omp_target_memcpy_rect cleanup (was: [patch] OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect)
Hi Tobias! On 2023-07-28T13:51:41+0200, Tobias Burnus wrote: > On 27.07.23 23:00, Thomas Schwinge wrote: >>> + else if (src_devicep != NULL >>> +&& (dst_devicep == NULL >>> +|| (dst_devicep->capabilities >>> +& GOMP_OFFLOAD_CAP_SHARED_MEM))) >> Are these 'GOMP_OFFLOAD_CAP_SHARED_MEM' actually reachable, given that >> 'omp_target_memcpy_check' (via 'omp_target_memcpy_rect_check') clears out >> the device to 'NULL' for 'GOMP_OFFLOAD_CAP_SHARED_MEM'? > > I have now undone this change – I did not dig deep enough into the > function calls. > > >>> + else if (dst_devicep == NULL && src_devicep == NULL) >>> + { >>> + memcpy ((char *) dst + dst_off, (const char *) src + src_off, >>> + length); >>> + ret = 1; >>> + } >>> else if (src_devicep == dst_devicep) >>>ret = src_devicep->dev2dev_func (src_devicep->target_id, >>> (char *) dst + dst_off, >>> (const char *) src + src_off, >>> length); >> ..., but also left the intra-device case here -- which should now be dead >> code here? > > Why? Unless I missed something, the old, the current, and the proposed > (= old) code do still run this code. It is now again reachable, but wasn't in the "intermediate state" (without your follow-on change) -- at least per my understanding, which may be gappy. > I have not added an assert to confirm, but in any case, it is tested for > in my recently added testcase - thus, we could add a 'printf' to confirm. We're now back to the original code, which should be fine. >>> + else if (*tmp_size < length) >>> + { >>> + *tmp_size = length; >>> + *tmp = realloc (*tmp, length); >>> + if (*tmp == NULL) >>> + return ENOMEM; >> If 'realloc' returns 'NULL', we should 'free' the original '*tmp'? >> >> Do we really need here the property here that if the re-allocation can't >> be done in-place, 'realloc' copies the original content to the new? In >> other words, should we just unconditionally 'free' and re-'malloc' here, >> instead of 'realloc'? > I have now done so – but I am not really sure what's faster on average. > If it can be enlarged, 'realloc' is faster, if it cannot free+malloc is > better. I have no proof, but would assume that the C library handles as efficiently as 'realloc' the case of 'free' plus subsequent 'malloc' if there's space available after the original allocation. >> I haven't looked whether the re-use of 'tmp' for multiple calls to this >> is then actually useful, or whether we should just always 'malloc', use, >> 'free' the buffer here? ..., hence that suggestion. But I agree what we've got now is good, just one more idea: couldn't we now actually unify the (original) 'malloc' and (original) 'realloc' case: -if (*tmp_size == 0) - { -*tmp_size = length; -*tmp = malloc (length); -if (*tmp == NULL) - return ENOMEM; - } -else if (*tmp_size < length) +if (*tmp_size < length) { *tmp_size = length; free (*tmp); *tmp = malloc (length); if (*tmp == NULL) return ENOMEM; } (Untested.) > Well, it can run in a hot loop – assume a C-array array[1024][1024][2] > and copying array[:1024,:1024,0:1] (using OpenMP syntax) – i.e. 1048576 > times every other element. And therefore I would like to avoid repeated > malloc/free in such a case. (But in general, interdevice copying should > be very rare.) > > Actually, I think the realloc case is unreachable: for rectangular > copies, as implied both by 'target update' with strided access and by > 'omp_target_memcpy_rect', the size should be constant. Worth an 'assert'? Now I "only" don't understand the '__builtin_mul_overflow' computations and error checks ('return EINVAL;') for the four cases handled in 'libgomp/target.c:omp_target_memcpy_rect_worker': for example, the generic loop at end of function iterates over all 'num_dims', but the specific earlier 'num_dims == N' cases don't. But I suppose that's just my own problem not understanding this API/code, and I'm not currently planning on looking into this any further. ;-) Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
RE: Machine Mode ICE in RISC-V when LTO
Hi! On 2023-08-10T12:25:36+, "Li, Pan2" wrote: > Thanks Richard for comment, let me try to promote the table to unsigned short. I have WIP work for this issue -- which I'd already raised a month ago: <https://inbox.sourceware.org/87o7kxuq9s@euler.schwinge.homeip.net>: On 2023-06-30T13:46:07+0200, Thomas Schwinge wrote: > In particular, the 'lto_mode_identity_table' changes would seem necessary > to keep standard LTO ('-flto') functional for large 'machine_mode' size? ... which is exactly the problem you've now run into? However, a simple: -GTY(()) const unsigned char *lto_mode_identity_table; +GTY(()) const unsigned short *lto_mode_identity_table; ..., or: -GTY(()) const unsigned char *lto_mode_identity_table; +GTY(()) const machine_mode *lto_mode_identity_table; ... is not sufficient: that runs into GTY issues, as the current 'unsigned char *lto_mode_identity_table' is (mis-)classified by 'gengtype' as a C string. This happens to work for this case, but still isn't right, and only works for 'char *' but not 'short *' etc. I have WIP work to tighten that. ..., which got me into other GTY issues, and so on... ;-) (Richard already ACKed and I pushed some of the prerequisite changes, but there's more to come.) I'm still planning on resolving all that mess, but I'm tight on time right now. However, I have a different proposal, which should address your current issue: simply, get rid of the 'lto_mode_identity_table', which is just that: a 1-to-1 mapping of array index to value. Instead, in 'gcc/lto/lto-common.cc:lto_file_finalize', for '!ACCEL_COMPILER', set 'file_data->mode_table = NULL', and in the users (only 'gcc/tree-streamer.h:bp_unpack_machine_mode'?), replace (untested): -return (machine_mode) ib->file_data->mode_table[ix]; +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix; Jakub, as the original author of 'lto_mode_identity_table' (see commit db847fa8f2cca6139188b8dfa0a7064319b19193 (Subversion r221005)), is there any reason not to do it this way? Grüße Thomas > -Original Message- > From: Richard Biener > Sent: Thursday, August 10, 2023 7:08 PM > To: Li, Pan2 > Cc: richard.sandif...@arm.com; Thomas Schwinge ; > ja...@redhat.com; kito.ch...@gmail.com; Jeff Law ; > juzhe.zh...@rivai.ai; Wang, Yanzhang > Subject: Re: Machine Mode ICE in RISC-V when LTO > > On Thu, Aug 10, 2023 at 10:19 AM Li, Pan2 wrote: >> >> Hi all, >> >> >> >> Recently I found there is still some issues for the machine mode with LTO >> part by fixing one >> >> ICE (only when compile with LTO) in RISC-V backend in , aka below case. >> >> >> >> >> ../__RISC-V_INSTALL___/bin/riscv64-unknown-elf-g++ -O2 -flto >> >> gcc/testsuite/g++.dg/torture/vshuf-v4df.C -o test.elf >> >> during RTL pass: expand >> >> gcc/testsuite/g++.dg/torture/vshuf-main.inc: In function 'main': >> >> gcc/testsuite/g++.dg/torture/vshuf-main.inc:15:9: internal compiler error: >> in as_a, at machmode.h:381 >> >>15 | V r = __builtin_shuffle(in1[i], mask1[i]); >> >> | ^ >> >> 0x7e5b8e scalar_int_mode as_a(machine_mode) >> >> ../.././gcc/gcc/machmode.h:381 >> >> 0x7eabdb scalar_mode as_a(machine_mode) >> >> ../.././gcc/gcc/expr.cc:332 >> >> 0x7eabdb convert_mode_scalar >> >> ../.././gcc/gcc/expr.cc:325 >> >> 0xb8485b store_expr(tree_node*, rtx_def*, int, bool, bool) >> >> ../.././gcc/gcc/expr.cc:6413 >> >> 0xb8a556 store_field >> >> ../.././gcc/gcc/expr.cc:7648 >> >> 0xb88f27 store_constructor(tree_node*, rtx_def*, int, poly_int<2u, long>, >> bool) >> >> ../.././gcc/gcc/expr.cc:7588 >> >> 0xb8b8b8 expand_constructor >> >> ../.././gcc/gcc/expr.cc:8931 >> >> 0xb76bc7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, >> expand_modifier, rtx_def**, bool) >> >> ../.././gcc/gcc/expr.cc:11170 >> >> 0xb77ef7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, >> expand_modifier, rtx_def**, bool) >> >> ../.././gcc/gcc/expr.cc:10809 >> >> 0xb83a80 store_expr(tree_node*, rtx_def*, int, bool, bool) >> >> ../.././gcc/gcc/expr.cc:6325 >> >> 0xb851d9 expand_assignment(tree_node*, tree_node*, bool) >> >> ../.././gcc/gcc/expr.cc:6043 >> >> 0xa48717 expand_gimple_stmt_1 &
[v3] OpenACC 2.7: default clause support for data constructs (was: [PATCH, OpenACC 2.7, v2] Implement default clause support for data constructs)
Hi! On 2023-08-01T23:35:16+0800, Chung-Lin Tang wrote: > this is v2 of the patch for implementing the OpenACC 2.7 addition of > default(none|present) support for data constructs. Thanks! > Instead of propagating an additional 'oacc_default_kind' for OpenACC, > this patch does it in a more complete way: it directly propagates the > gimplify_omp_ctx* pointer of the inner most context where we found > a default-clause. Right -- but reviewing this, it came upon me that we don't need any such new code at all, and instead may in 'gcc/gimplify.cc:oacc_default_clause' simply look through the 'ctx's to find the 'default' clause information. This centralizes the logic in the one place where it's relevant. > This supports displaying the location/type of OpenACC > construct where the default-clause is in the error messages. This is preserved... > The testcases also have the multiple nested data construct testing added, > where we can now have messages referring precisely to the exact innermost > default clause that was active at that program point. ..., but we should also still 'inform' about the compute construct, where the user is expected to add explicit data clauses (if not adding to the 'data' construct where the 'default(none)' clause appears): > --- a/gcc/gimplify.cc > +++ b/gcc/gimplify.cc > @@ -7785,16 +7809,20 @@ oacc_default_clause (struct gimplify_omp_ctx *ctx, > tree decl, unsigned flags) > - else if (ctx->default_kind == OMP_CLAUSE_DEFAULT_NONE) > + else if (default_kind == OMP_CLAUSE_DEFAULT_NONE) > { >error ("%qE not specified in enclosing OpenACC %qs construct", > - DECL_NAME (lang_hooks.decls.omp_report_decl (decl)), rkind); > - inform (ctx->location, "enclosing OpenACC %qs construct", rkind); > -} > - else if (ctx->default_kind == OMP_CLAUSE_DEFAULT_PRESENT) > + DECL_NAME (lang_hooks.decls.omp_report_decl (decl)), > + oacc_region_type_name (ctx->region_type)); > + inform (ctx->oacc_default_clause_ctx->location, > + "enclosing OpenACC %qs construct", > + oacc_region_type_name > + (ctx->oacc_default_clause_ctx->region_type)); > +} That is, we should keep here the original 'inform' for 'ctx->location', and *add another* 'inform' for 'ctx->oacc_default_clause_ctx->location'. Otherwise that's confusing to users. Instead of requiring another iteration through you, I've now implemented that, and with test cases enhanced some more, pushed to master branch commit bed993884b149851fe930b43cf11cbcdf05f1578 "OpenACC 2.7: default clause support for data constructs", see attached. Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From bed993884b149851fe930b43cf11cbcdf05f1578 Mon Sep 17 00:00:00 2001 From: Chung-Lin Tang Date: Tue, 6 Jun 2023 03:46:29 -0700 Subject: [PATCH] OpenACC 2.7: default clause support for data constructs This patch implements the OpenACC 2.7 addition of default(none|present) support for data constructs. Now, specifying "default(none|present)" on a data construct turns on same default clause behavior for all lexically enclosed compute constructs (which don't already themselves have a default clause). gcc/c/ChangeLog: * c-parser.cc (OACC_DATA_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_DEFAULT. gcc/cp/ChangeLog: * parser.cc (OACC_DATA_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_DEFAULT. gcc/fortran/ChangeLog: * openmp.cc (OACC_DATA_CLAUSES): Add OMP_CLAUSE_DEFAULT. gcc/ChangeLog: * gimplify.cc (oacc_region_type_name): New function. (oacc_default_clause): If no 'default' clause appears on this compute construct, see if one appears on a lexically containing 'data' construct. (gimplify_scan_omp_clauses): Upon OMP_CLAUSE_DEFAULT case, set ctx->oacc_default_clause_ctx to current context. gcc/testsuite/ChangeLog: * c-c++-common/goacc/default-3.c: Adjust testcase. * c-c++-common/goacc/default-4.c: Adjust testcase. * c-c++-common/goacc/default-5.c: Adjust testcase. * gfortran.dg/goacc/default-3.f95: Adjust testcase. * gfortran.dg/goacc/default-4.f: Adjust testcase. * gfortran.dg/goacc/default-5.f: Adjust testcase. Co-authored-by: Thomas Schwinge --- gcc/c/c-parser.cc | 1 + gcc/cp/parser.cc | 1 + gcc/fortran/openmp.cc | 3 +- gcc/gimplify.cc | 64 +++ gcc/testsuite/c-c++-common/goacc/default-3.c |