https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127822
This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent kernels for which the trip count must also be evaluated in advance to the kernel call. >From 0e96e97bb5405904522d1bd54b458fb92d11f7fb Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Wed, 19 Feb 2025 15:15:01 +0000 Subject: [PATCH] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent Generic-SPMD kernels for which the trip count must also be evaluated in advance to the kernel call. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 12 +-- flang/test/Lower/OpenMP/host-eval.f90 | 103 ++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index bd794033cdf11..8c80453610473 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -562,8 +562,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute_parallel_do: case OMPD_distribute_parallel_do_simd: - cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumThreads(stmtCtx, hostInfo.ops); + [[fallthrough]]; + case OMPD_distribute: + case OMPD_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; // Cases where 'teams' clauses might be present, and target SPMD is @@ -573,10 +576,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams: cp.processNumTeams(stmtCtx, hostInfo.ops); - processSingleNestedIf([](Directive nestedDir) { - return nestedDir == OMPD_distribute_parallel_do || - nestedDir == OMPD_distribute_parallel_do_simd; - }); + processSingleNestedIf( + [](Directive nestedDir) { return topDistributeSet.test(nestedDir); }); break; // Cases where only 'teams' host-evaluated clauses might be present. @@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumTeams(stmtCtx, hostInfo.ops); break; diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90 index 32c52462b86a7..65258c91e5daf 100644 --- a/flang/test/Lower/OpenMP/host-eval.f90 +++ b/flang/test/Lower/OpenMP/host-eval.f90 @@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd() !$omp end distribute parallel do simd !$omp end teams end subroutine distribute_parallel_do_simd + +! BOTH-LABEL: func.func @_QPdistribute +subroutine distribute() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 + call foo() + end do + !$omp end distribute + !$omp end teams +end subroutine distribute + +! BOTH-LABEL: func.func @_QPdistribute_simd +subroutine distribute_simd() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 + call foo() + end do + !$omp end distribute simd + !$omp end teams +end subroutine distribute_simd _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits