Hi! I've committed this patch to gomp4 branch to: 1) fix handling of reference based array sections - reference to array and reference to pointer. The latter actually needs 3 map clauses, one to map the array section, one to map the pointer to it, and one to map the reference to the pointer. 2) if OMP_CLAUSE_SIZE was missing, sizes entry was mistakenly in bits rather than in bytes. 3) I figured out we need to tell the runtime library not just address, size and kind, but also alignment (we won't need that for the #pragma omp declare target global vars though), so that the runtime library can properly align it. As TYPE_ALIGN/DECL_ALIGN is in bits and is 32 bit wide, when that is in bytes and we only care about power of twos, I've decided to encode it in the upper 5 bits of the kind (lower 3 bits are used for OMP_CLAUSE_MAP_* kind). 4) the reference testcase showed a problem with fold_stmt calls we do very early, during gimplification, because for TREE_READONLY vars with DECL_INITIAL fold_stmt can replace the uses of the var with its initializer, but as the gimplifier isn't aware of it, we wouldn't remap that, or worse there could be explicit remapping of it via array section, but one that the compiler doesn't see, and if that is smaller than the whole array size, that would result in runtime error. So, after some talk with richi on IRC, I've decided to just not fold_stmt inside of target constructs during gimplification and defer it until omplower.
2013-09-05 Jakub Jelinek <ja...@redhat.com> * gimplify.c (gimplify_call_expr): Don't call fold_stmt inside of #pragma omp target construct. (gimplify_modify_expr): Likewise. * omp-low.c (target_nesting_level): New variable. (lower_omp_target): Increase/restore target_nesting_level around lowering #pragma omp target body. Use TYPE_SIZE_UNIT instead of TYPE_SIZE if OMP_CLAUSE_SIZE is missing. Or log2 of needed alignment into high 5 bits of kind. (lower_omp): Call fold_stmt on all stmts inside of #pragma omp target construct. cp/ * semantics.c (handle_omp_array_sections): Fix up handling of reference to array and reference to pointer based array sections. (finish_omp_clauses): Don't report errors about non-decl in OMP_CLAUSE_DECL for OMP_CLAUSE_MAP_POINTER. libgomp/ * testsuite/libgomp.c++/target-2.C: New test. * testsuite/libgomp.c++/target-2-aux.cc: New file. --- gcc/gimplify.c.jj 2013-09-05 09:19:03.000000000 +0200 +++ gcc/gimplify.c 2013-09-05 14:45:48.632720617 +0200 @@ -2704,7 +2704,14 @@ gimplify_call_expr (tree *expr_p, gimple notice_special_calls (call); gimplify_seq_add_stmt (pre_p, call); gsi = gsi_last (*pre_p); - fold_stmt (&gsi); + /* Don't fold stmts inside of target construct. We'll do it + during omplower pass instead. */ + struct gimplify_omp_ctx *ctx; + for (ctx = gimplify_omp_ctxp; ctx; ctx = ctx->outer_context) + if (ctx->region_type == ORT_TARGET) + break; + if (ctx == NULL) + fold_stmt (&gsi); *expr_p = NULL_TREE; } else @@ -4961,7 +4968,14 @@ gimplify_modify_expr (tree *expr_p, gimp gimplify_seq_add_stmt (pre_p, assign); gsi = gsi_last (*pre_p); - fold_stmt (&gsi); + /* Don't fold stmts inside of target construct. We'll do it + during omplower pass instead. */ + struct gimplify_omp_ctx *ctx; + for (ctx = gimplify_omp_ctxp; ctx; ctx = ctx->outer_context) + if (ctx->region_type == ORT_TARGET) + break; + if (ctx == NULL) + fold_stmt (&gsi); if (want_value) { --- gcc/omp-low.c.jj 2013-09-05 09:19:03.000000000 +0200 +++ gcc/omp-low.c 2013-09-05 17:11:14.693638660 +0200 @@ -134,6 +134,7 @@ struct omp_for_data static splay_tree all_contexts; static int taskreg_nesting_level; +static int target_nesting_level; struct omp_region *root_omp_region; static bitmap task_shared_vars; @@ -9213,7 +9214,13 @@ lower_omp_target (gimple_stmt_iterator * map_cnt++; } - if (kind != GF_OMP_TARGET_KIND_UPDATE) + if (kind == GF_OMP_TARGET_KIND_REGION) + { + target_nesting_level++; + lower_omp (&tgt_body, ctx); + target_nesting_level--; + } + else if (kind == GF_OMP_TARGET_KIND_DATA) lower_omp (&tgt_body, ctx); if (kind == GF_OMP_TARGET_KIND_REGION) @@ -9320,7 +9327,7 @@ lower_omp_target (gimple_stmt_iterator * } tree s = OMP_CLAUSE_SIZE (c); if (s == NULL_TREE) - s = TYPE_SIZE (TREE_TYPE (ovar)); + s = TYPE_SIZE_UNIT (TREE_TYPE (ovar)); s = fold_convert (size_type_node, s); tree purpose = size_int (map_idx++); CONSTRUCTOR_APPEND_ELT (vsize, purpose, s); @@ -9342,6 +9349,11 @@ lower_omp_target (gimple_stmt_iterator * default: gcc_unreachable (); } + unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar)); + if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign) + talign = DECL_ALIGN_UNIT (ovar); + talign = ceil_log2 (talign); + tkind |= talign << 3; CONSTRUCTOR_APPEND_ELT (vkind, purpose, build_int_cst (unsigned_char_type_node, tkind)); @@ -9673,6 +9685,12 @@ lower_omp (gimple_seq *body, omp_context gimple_stmt_iterator gsi; for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi)) lower_omp_1 (&gsi, ctx); + /* Inside target region we haven't called fold_stmt during gimplification, + because it can break code by adding decl references that weren't in the + source. Call fold_stmt now. */ + if (target_nesting_level) + for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi)) + fold_stmt (&gsi); input_location = saved_location; } --- gcc/cp/semantics.c.jj 2013-09-05 09:19:03.000000000 +0200 +++ gcc/cp/semantics.c 2013-09-05 15:57:01.106488431 +0200 @@ -4506,6 +4506,7 @@ handle_omp_array_sections (tree c) t = fold_convert_loc (OMP_CLAUSE_LOCATION (c), ptrdiff_type_node, t); tree ptr = OMP_CLAUSE_DECL (c2); + ptr = convert_from_reference (ptr); if (!POINTER_TYPE_P (TREE_TYPE (ptr))) ptr = build_fold_addr_expr (ptr); t = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR, @@ -4515,6 +4516,19 @@ handle_omp_array_sections (tree c) OMP_CLAUSE_SIZE (c2) = t; OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); OMP_CLAUSE_CHAIN (c) = c2; + ptr = OMP_CLAUSE_DECL (c2); + if (TREE_CODE (TREE_TYPE (ptr)) == REFERENCE_TYPE + && POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (ptr)))) + { + tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), + OMP_CLAUSE_MAP); + OMP_CLAUSE_MAP_KIND (c3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_DECL (c3) = ptr; + OMP_CLAUSE_DECL (c2) = convert_from_reference (ptr); + OMP_CLAUSE_SIZE (c3) = size_zero_node; + OMP_CLAUSE_CHAIN (c3) = OMP_CLAUSE_CHAIN (c2); + OMP_CLAUSE_CHAIN (c2) = c3; + } } } return false; @@ -4943,6 +4957,9 @@ finish_omp_clauses (tree clauses) { if (processing_template_decl) break; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) + break; if (DECL_P (t)) error ("%qD is not a variable in %qs clause", t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); --- libgomp/testsuite/libgomp.c++/target-2.C.jj 2013-09-05 17:24:46.580449574 +0200 +++ libgomp/testsuite/libgomp.c++/target-2.C 2013-09-05 17:37:58.428382074 +0200 @@ -0,0 +1,58 @@ +// { dg-options "-O2 -fopenmp" } +// { dg-additional-sources "target-2-aux.cc" } + +extern "C" void abort (void); + +void +fn1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +double b[1024]; +double (&br) [1024] = b; +double cbuf[1024]; +double *c = cbuf; +double *&cr = c; +extern double (&fr) [1024]; +extern double *&gr; + +double +fn2 (int x, double (&dr) [1024], double *&er) +{ + double s = 0; + double h[1024]; + double (&hr) [1024] = h; + double ibuf[1024]; + double *i = ibuf; + double *&ir = i; + int j; + fn1 (hr + 2 * x, ir + 2 * x, x); + #pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \ + map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) + #pragma omp parallel for reduction(+:s) + for (j = 0; j < x; j++) + s += br[j] * cr[j] + dr[x + j] + er[x + j] + + fr[j] + gr[j] + hr[2 * x + j] + ir[2 * x + j]; + return s; +} + +int +main () +{ + double d[1024]; + double ebuf[1024]; + double *e = ebuf; + fn1 (br, cr, 128); + fn1 (d + 128, e + 128, 128); + fn1 (fr, gr, 128); + double h = fn2 (128, d, e); + if (h != 20416.0) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c++/target-2-aux.cc.jj 2013-09-05 17:25:12.924312307 +0200 +++ libgomp/testsuite/libgomp.c++/target-2-aux.cc 2013-09-05 12:40:40.000000000 +0200 @@ -0,0 +1,5 @@ +double f[1024]; +double (&fr) [1024] = f; +double gbuf[1024]; +double *g = gbuf; +double *&gr = g; Jakub