Hi! This patch adds support for linear clause on OpenMP 4.1 worksharing loops.
2015-06-18 Jakub Jelinek <ja...@redhat.com> * gimplify.c (gimplify_scan_omp_clauses): For linear clause on worksharing loop combined with parallel add shared clause on the parallel. * omp-low.c (lower_rec_input_clauses): Set lastprivate_firstprivate flag for linear that needs copyin and copyout. (expand_omp_for_generic, expand_omp_for_static_nochunk, expand_omp_for_static_chunk): Handle linear clauses on worksharing loop. (lower_omp_for): Adjust OMP_CLAUSE_DECL and OMP_CLAUSE_LINEAR_STEP so that expand_omp_for_* can use it during expansion for linear adjustments. gcc/c-family/ * c-omp.c (c_omp_split_clauses): Fix up a comment. Put OMP_CLAUSE_LINEAR on OMP_FOR if not combined with OMP_SIMD. libgomp/ * testsuite/libgomp.c/pr66199-3.c: New test. * testsuite/libgomp.c/pr66199-4.c: New test. * testsuite/libgomp.c/linear-1.c: New test. * testsuite/libgomp.c/linear-2.c: New test. * testsuite/libgomp.c++/linear-1.C: New test. * testsuite/libgomp.c++/linear-2.C: New test. --- gcc/gimplify.c.jj 2015-06-12 16:42:16.000000000 +0200 +++ gcc/gimplify.c 2015-06-17 13:51:24.244250569 +0200 @@ -6276,6 +6276,11 @@ gimplify_scan_omp_clauses (tree *list_p, decl = NULL_TREE; break; } + flags = GOVD_SEEN; + if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + flags |= GOVD_FIRSTPRIVATE; + if (!OMP_CLAUSE_LINEAR_NO_COPYOUT (c)) + flags |= GOVD_LASTPRIVATE; if (octx && octx->region_type == ORT_WORKSHARE && octx->combined_loop) @@ -6293,16 +6298,16 @@ gimplify_scan_omp_clauses (tree *list_p, && (octx->region_type & ORT_TASK) != 0 && octx->combined_loop) ; + else if (octx + && octx->region_type == ORT_COMBINED_PARALLEL + && ctx->region_type == ORT_WORKSHARE + && octx == outer_ctx) + flags = GOVD_SEEN | GOVD_SHARED; else break; gcc_checking_assert (splay_tree_lookup (octx->variables, (splay_tree_key) decl) == NULL); - flags = GOVD_SEEN; - if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - flags |= GOVD_FIRSTPRIVATE; - if (!OMP_CLAUSE_LINEAR_NO_COPYOUT (c)) - flags |= GOVD_LASTPRIVATE; omp_add_variable (octx, decl, flags); if (octx->outer_context == NULL) break; --- gcc/omp-low.c.jj 2015-06-12 12:23:06.000000000 +0200 +++ gcc/omp-low.c 2015-06-17 12:10:30.422658998 +0200 @@ -3961,7 +3961,11 @@ lower_rec_input_clauses (tree clauses, g } case OMP_CLAUSE_FIRSTPRIVATE: case OMP_CLAUSE_COPYIN: + break; case OMP_CLAUSE_LINEAR: + if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c) + && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)) + lastprivate_firstprivate = true; break; case OMP_CLAUSE_REDUCTION: if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) @@ -6992,6 +6996,9 @@ expand_omp_for_generic (struct omp_regio zero_iter_bb)); } } + gimple_stmt_iterator gsif = gsi; + gsi_prev (&gsif); + if (in_combined_parallel) { /* In a combined parallel loop, emit a call to @@ -7089,6 +7096,10 @@ expand_omp_for_generic (struct omp_regio /* Remove the GIMPLE_OMP_FOR statement. */ gsi_remove (&gsi, true); + if (gsi_end_p (gsif)) + gsif = gsi_after_labels (gsi_bb (gsif)); + gsi_next (&gsif); + /* Iteration setup for sequential loop goes in L0_BB. */ tree startvar = fd->loop.v; tree endvar = NULL_TREE; @@ -7140,6 +7151,54 @@ expand_omp_for_generic (struct omp_regio assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + itercnt = startvar; + tree n1 = fd->loop.n1; + if (POINTER_TYPE_P (TREE_TYPE (itercnt))) + { + itercnt + = fold_convert (signed_type_for (TREE_TYPE (itercnt)), + itercnt); + n1 = fold_convert (TREE_TYPE (itercnt), n1); + } + itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), + itercnt, n1); + itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), + itercnt, fd->loop.step); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); @@ -7571,6 +7630,49 @@ expand_omp_for_static_nochunk (struct om assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercnt = fold_build2 (MINUS_EXPR, itype, + fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); + itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + dest = unshare_expr (t); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); @@ -7908,6 +8010,8 @@ expand_omp_for_static_chunk (struct omp_ /* Remove the GIMPLE_OMP_FOR. */ gsi_remove (&gsi, true); + gimple_stmt_iterator gsif = gsi; + /* Iteration space partitioning goes in ITER_PART_BB. */ gsi = gsi_last_bb (iter_part_bb); @@ -7978,6 +8082,56 @@ expand_omp_for_static_chunk (struct omp_ assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE, itercntbias = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercntbias + = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, + itercntbias, step); + itercntbias + = force_gimple_operand_gsi (&gsif, itercntbias, true, + NULL_TREE, true, + GSI_SAME_STMT); + itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); @@ -11935,6 +12089,18 @@ lower_omp_for (gimple_stmt_iterator *gsi lower_omp_for_lastprivate (&fd, &body, &dlist, ctx); + if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + OMP_CLAUSE_DECL (c) = lookup_decl (OMP_CLAUSE_DECL (c), ctx); + if (DECL_P (OMP_CLAUSE_LINEAR_STEP (c))) + OMP_CLAUSE_LINEAR_STEP (c) + = maybe_lookup_decl_in_outer_ctx (OMP_CLAUSE_LINEAR_STEP (c), + ctx); + } + gimple_seq_add_stmt (&body, stmt); gimple_seq_add_seq (&body, gimple_omp_body (stmt)); --- gcc/c-family/c-omp.c.jj 2015-06-15 11:56:53.000000000 +0200 +++ gcc/c-family/c-omp.c 2015-06-18 14:41:19.986387334 +0200 @@ -770,7 +770,6 @@ c_omp_split_clauses (location_t loc, enu break; case OMP_CLAUSE_SAFELEN: case OMP_CLAUSE_SIMDLEN: - case OMP_CLAUSE_LINEAR: case OMP_CLAUSE_ALIGNED: s = C_OMP_CLAUSE_SPLIT_SIMD; break; @@ -919,7 +918,8 @@ c_omp_split_clauses (location_t loc, enu } s = C_OMP_CLAUSE_SPLIT_SIMD; break; - /* Shared and default clauses are allowed on private and teams. */ + /* Shared and default clauses are allowed on parallel, teams and + taskloop. */ case OMP_CLAUSE_SHARED: case OMP_CLAUSE_DEFAULT: if (code == OMP_TEAMS) @@ -1007,6 +1007,14 @@ c_omp_split_clauses (location_t loc, enu else s = C_OMP_CLAUSE_SPLIT_TARGET; break; + case OMP_CLAUSE_LINEAR: + /* Linear clause is allowed on simd and for. Put it on the + innermost construct. */ + if (code == OMP_SIMD) + s = C_OMP_CLAUSE_SPLIT_SIMD; + else + s = C_OMP_CLAUSE_SPLIT_FOR; + break; default: gcc_unreachable (); } --- libgomp/testsuite/libgomp.c/pr66199-3.c.jj 2015-06-17 14:32:53.239796087 +0200 +++ libgomp/testsuite/libgomp.c/pr66199-3.c 2015-06-15 15:37:04.000000000 +0200 @@ -0,0 +1,50 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp" } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f1 (long a, long b) +{ + long d; + #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; + return d; +} + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) linear(c:5) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + c += 5; + e = c; + } + return d + c + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f1 (0, 1024) != 1024 + || f2 (0, 1024, 17) != 1024 + 2 * (17 + 5 * 1024) + || f3 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/pr66199-4.c.jj 2015-06-17 14:32:56.149751373 +0200 +++ libgomp/testsuite/libgomp.c/pr66199-4.c 2015-06-16 16:29:48.000000000 +0200 @@ -0,0 +1,59 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp" } */ + +#pragma omp declare target +int u[1024], v[1024], w[1024]; +#pragma omp end declare target + +__attribute__((noinline, noclone)) void +f1 (long a, long b) +{ + long d; + #pragma omp target teams distribute parallel for default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; +} + +__attribute__((noinline, noclone)) void +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp target teams distribute parallel for default(none) firstprivate (a, b) shared(u, v, w) linear(d) linear(c:5) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + c += 5; + e = c; + } +} + +__attribute__((noinline, noclone)) void +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; +} + +__attribute__((noinline, noclone)) void +f4 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; +} + +int +main () +{ + f1 (0, 1024); + f2 (0, 1024, 17); + f3 (0, 32, 0, 32); + f4 (0, 32, 0, 32); + return 0; +} --- libgomp/testsuite/libgomp.c/linear-1.c.jj 2015-06-16 12:09:32.375016775 +0200 +++ libgomp/testsuite/libgomp.c/linear-1.c 2015-06-16 14:54:34.000000000 +0200 @@ -0,0 +1,250 @@ +int a[256]; + +__attribute__((noinline, noclone)) int +f1 (int i) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f2 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f3 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f4 (int i) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f5 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f6 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f7 (int i) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f8 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f9 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f10 (int i, long step) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f11 (short int i, char k, char step) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f12 (long long int i, long long int k, int step) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f13 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f14 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f15 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f16 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f17 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f18 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +int +main () +{ +#define TEST(x) \ + if (x != 8 + 48 * 4) \ + __builtin_abort (); \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + TEST (f1 (8)); + TEST (f2 (8, 3)); + TEST (f3 (8LL, 4LL)); + TEST (f4 (8)); + TEST (f5 (8, 3)); + TEST (f6 (8LL, 4LL)); + TEST (f7 (8)); + TEST (f8 (8, 3)); + TEST (f9 (8LL, 4LL)); + TEST (f10 (8, 2)); + TEST (f11 (8, 3, 2)); + TEST (f12 (8LL, 4LL, 2)); + TEST (f13 (8, 2)); + TEST (f14 (8, 3, 2)); + TEST (f15 (8LL, 4LL, 2)); + TEST (f16 (8, 2)); + TEST (f17 (8, 3, 2)); + TEST (f18 (8LL, 4LL, 2)); + return 0; +} --- libgomp/testsuite/libgomp.c/linear-2.c.jj 2015-06-17 12:08:14.686784115 +0200 +++ libgomp/testsuite/libgomp.c/linear-2.c 2015-06-17 11:27:25.000000000 +0200 @@ -0,0 +1,233 @@ +#pragma omp declare target +int a[256]; +#pragma omp end declare target + +__attribute__((noinline, noclone)) void +f1 (int i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f2 (short int i, char k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f3 (long long int i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f4 (int i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f5 (short int i, char k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f6 (long long int i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f7 (int i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f8 (short int i, char k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f9 (long long int i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f10 (int i, char start, long step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f11 (short int i, char k, long start, char step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f12 (long long int i, long long int k, long long int start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f13 (int i, int start, long long int step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(static, 3) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f14 (short int i, char k, unsigned long long int start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(static, 5) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f15 (long long int i, long long int k, char start, long int step) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(static, 7) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f16 (int i, int start, long long int step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f17 (short int i, char k, long start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f18 (long long int i, long long int k, short start, long int step) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(dynamic, 7) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +int +main () +{ +#define TEST(x) \ + x; \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + TEST (f1 (8)); + TEST (f2 (8, 3)); + TEST (f3 (8LL, 4LL)); + TEST (f4 (8)); + TEST (f5 (8, 3)); + TEST (f6 (8LL, 4LL)); + TEST (f7 (8)); + TEST (f8 (8, 3)); + TEST (f9 (8LL, 4LL)); + TEST (f10 (8, 16, 2)); + TEST (f11 (8, 3, 16, 2)); + TEST (f12 (8LL, 4LL, 16, 2)); + TEST (f13 (8, 16, 2)); + TEST (f14 (8, 3, 16, 2)); + TEST (f15 (8LL, 4LL, 16, 2)); + TEST (f16 (8, 16, 2)); + TEST (f17 (8, 3, 16, 2)); + TEST (f18 (8LL, 4LL, 16, 2)); + return 0; +} --- libgomp/testsuite/libgomp.c++/linear-1.C.jj 2015-06-17 18:26:14.795257421 +0200 +++ libgomp/testsuite/libgomp.c++/linear-1.C 2015-06-17 18:15:29.000000000 +0200 @@ -0,0 +1,268 @@ +int a[256]; + +__attribute__((noinline, noclone)) int +f1 (int i) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int & +f2 (short int &i, char k) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T +f3 (T i, T k) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T & +f4 (T &i) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f5 (short int i, char &k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <int N> +__attribute__((noinline, noclone)) long long int +f6 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f7 (int &i) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f8 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f9 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T & +f10 (T &i, long &step) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f11 (short int i, char k, char step) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f12 (long long int i, long long int k, int step) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f13 (int &i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f14 (short int &i, char &k, int &step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +template <int N> +__attribute__((noinline, noclone)) long long int +f15 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f16 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f17 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T +f18 (T i, T k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +int +main () +{ +#define TEST(x) \ + if (x != 8 + 48 * 4) \ + __builtin_abort (); \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + TEST (f1 (8)); + short int vs = 8; + TEST (f2 (vs, 3)); + TEST (f3 (8LL, 4LL)); + int vi = 8; + TEST (f4 (vi)); + char vk = 3; + TEST (f5 (8, vk)); + TEST (f6<7> (8LL, 4LL)); + vi = 8; + TEST (f7 (vi)); + TEST (f8 (8, 3)); + TEST (f9 (8LL, 4LL)); + vi = 8; + long vl = 2; + TEST (f10 (vi, vl)); + TEST (f11 (8, 3, 2)); + TEST (f12 (8LL, 4LL, 2)); + vi = 8; + TEST (f13 (vi, 2)); + vs = 8; + vk = 3; + vi = 2; + TEST (f14 (vs, vk, vi)); + TEST (f15<9> (8LL, 4LL, 2)); + TEST (f16 (8, 2)); + TEST (f17 (8, 3, 2)); + long long int vll1 = 8LL; + long long int vll2 = 4LL; + TEST (f18<long long int &> (vll1, vll2, 2)); + return 0; +} --- libgomp/testsuite/libgomp.c++/linear-2.C.jj 2015-06-17 18:26:17.673213343 +0200 +++ libgomp/testsuite/libgomp.c++/linear-2.C 2015-06-17 18:25:23.000000000 +0200 @@ -0,0 +1,250 @@ +#pragma omp declare target +int a[256]; +#pragma omp end declare target + +template <typename T> +__attribute__((noinline, noclone)) void +f1 (T &i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f2 (short int i, char k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f3 (long long int &i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f4 (int i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f5 (short int i, char k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +template <int N> +__attribute__((noinline, noclone)) void +f6 (long long int &i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f7 (int i) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f8 (short int &i, char &k) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f9 (long long int i, long long int k) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f10 (int i, char start, long step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f11 (short int &i, char &k, long &start, char step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f12 (long long int i, long long int k, long long int start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f13 (int i, int start, long long int step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(static, 3) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f14 (short int i, char k, unsigned long long int start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(static, 5) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +template <typename T> +__attribute__((noinline, noclone)) void +f15 (T &i, T k, char &start, long int &step) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(static, 7) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +template <typename T> +__attribute__((noinline, noclone)) void +f16 (T i, T start, long long int step) +{ + #pragma omp target teams distribute parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f17 (short int i, char k, long start, int step) +{ + #pragma omp target teams distribute parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +__attribute__((noinline, noclone)) void +f18 (long long int i, long long int k, short start, long int step) +{ + #pragma omp target teams distribute parallel for linear (i: k) schedule(dynamic, 7) + for (short j = start; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } +} + +int +main () +{ +#define TEST(x) \ + x; \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + int vi = 8; + TEST (f1 (vi)); + TEST (f2 (8, 3)); + long long int vll = 8LL; + TEST (f3 (vll, 4LL)); + TEST (f4 (8)); + TEST (f5 (8, 3)); + vll = 8LL; + TEST (f6<9> (vll, 4LL)); + TEST (f7 (8)); + short int vs = 8; + char vk = 3; + TEST (f8 (vs, vk)); + TEST (f9 (8LL, 4LL)); + TEST (f10 (8, 16, 2)); + vs = 8; + vk = 3; + long int vl = 16; + TEST (f11 (vs, vk, vl, 2)); + TEST (f12 (8LL, 4LL, 16, 2)); + TEST (f13 (8, 16, 2)); + TEST (f14 (8, 3, 16, 2)); + vll = 8LL; + vk = 16; + vl = 2; + TEST (f15 (vll, 4LL, vk, vl)); + vi = 8; + int vi2 = 16; + TEST (f16<int &> (vi, vi2, 2)); + TEST (f17 (8, 3, 16, 2)); + TEST (f18 (8LL, 4LL, 16, 2)); + return 0; +} Jakub