llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-clang Author: Gheorghe-Teodor Bercea (doru1004) <details> <summary>Changes</summary> This patch fixes the dynamic schedule tracking. --- Patch is 788.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97065.diff 24 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+14) - (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+16) - (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+5) - (modified) clang/test/OpenMP/distribute_parallel_for_codegen.cpp (+36) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp (+96-60) - (modified) clang/test/OpenMP/nvptx_SPMD_codegen.cpp (+2034-1923) - (modified) clang/test/OpenMP/ordered_codegen.cpp (+64-34) - (modified) clang/test/OpenMP/parallel_for_codegen.cpp (+30-6) - (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+4) - (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp (+40) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp (+80-40) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp (+40) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp (+80-40) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+3) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+3) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+4) - (modified) offload/DeviceRTL/include/Interface.h (+4) - (modified) offload/DeviceRTL/src/Kernel.cpp (+1) - (modified) offload/DeviceRTL/src/Workshare.cpp (+62-14) - (added) offload/test/offloading/dynamic-schedule.cpp (+55) - (added) offload/test/offloading/schedule.c (+84) - (modified) openmp/runtime/src/kmp.h (+2) - (modified) openmp/runtime/src/kmp_dispatch.cpp (+5) ``````````diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f6d12d46cfc07..b47b521edd32c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2553,6 +2553,15 @@ void CGOpenMPRuntime::emitForDispatchInit( Args); } +void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, + SourceLocation Loc) { + if (!CGF.HaveInsertPoint()) + return; + // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args); +} + static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, @@ -11996,6 +12005,11 @@ void CGOpenMPSIMDRuntime::emitForDispatchInit( llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitForStaticInit( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 522ae3d35d22d..f65314d014c08 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -946,6 +946,14 @@ class CGOpenMPRuntime { unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues); + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// + virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc); + /// Struct with the values to be passed to the static runtime function struct StaticRTInput { /// Size of the iteration variable in bits. @@ -1829,6 +1837,14 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override; + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// + void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override; + /// Call the appropriate runtime routine to initialize it before start /// of loop. /// diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f73d32de7c484..3c88f4eb71572 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2985,12 +2985,14 @@ void CodeGenFunction::EmitOMPForOuterLoop( // run-sched-var ICV. If the ICV is set to auto, the schedule is // implementation defined // + // __kmpc_dispatch_init(); // while(__kmpc_dispatch_next(&LB, &UB)) { // idx = LB; // while (idx <= UB) { BODY; ++idx; // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. // } // inner loop // } + // __kmpc_dispatch_deinit(); // // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When schedule(static, chunk_size) is specified, iterations are divided into @@ -3044,6 +3046,9 @@ void CodeGenFunction::EmitOMPForOuterLoop( OuterLoopArgs.DKind = LoopArgs.DKind; EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, emitOMPLoopBodyWithStopPoint, CodeGenOrdered); + if (DynamicOrOrdered) { + RT.emitForDispatchDeinit(*this, S.getBeginLoc()); + } } static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index a8892a06d4b30..109f1690e448b 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -2283,6 +2283,9 @@ int main() { // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -2533,6 +2536,9 @@ int main() { // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -4010,6 +4016,9 @@ int main() { // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -4253,6 +4262,9 @@ int main() { // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -6314,6 +6326,9 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -6554,6 +6569,9 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8627,6 +8645,9 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8867,6 +8888,9 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -10884,6 +10908,9 @@ int main() { // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -11117,6 +11144,9 @@ int main() { // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -13146,6 +13176,9 @@ int main() { // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -13379,6 +13412,9 @@ int main() { // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index a23d538910edd..14dcb17358061 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2415,12 +2415,15 @@ int main() { // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2689,12 +2692,15 @@ int main() { // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -4310,12 +4316,15 @@ int main() { // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -4577,12 +4586,15 @@ int main() { // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -6836,12 +6848,15 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -7100,12 +7115,15 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -9302,12 +9320,15 @@ int main() { // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]],... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/97065 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits