https://github.com/abidh updated https://github.com/llvm/llvm-project/pull/135161
>From 4ef791c9ad1cb951b430bff4d1ab66dd4ad30080 Mon Sep 17 00:00:00 2001 From: Abid Qadeer <haqad...@amd.com> Date: Thu, 10 Apr 2025 11:32:29 +0100 Subject: [PATCH 1/3] [OMPIRBuilder] Don't discard the debug record from entry block. When we get a function back from CodeExtractor, we disard its entry block after coping its instructions into the entry block we prepared. While copying the instructions, the terminator is discarded for obvious reasons. But if there were some debug values attached to the terminator, those are useful and needs to be copied. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index be05f01c94603..4f3745cdd2d23 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -788,8 +788,13 @@ void OpenMPIRBuilder::finalize(Function *Fn) { Instruction &I = *It; It++; - if (I.isTerminator()) + if (I.isTerminator()) { + // Absorb any debug value that terminator may have + if (OI.EntryBB->getTerminator()) + OI.EntryBB->getTerminator()->adoptDbgRecords( + &ArtificialEntry, I.getIterator(), false); continue; + } I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); } >From 2fff9520c56a1c6556e884478a4217f6e63d6e28 Mon Sep 17 00:00:00 2001 From: Abid Qadeer <haqad...@amd.com> Date: Mon, 28 Apr 2025 13:40:32 +0100 Subject: [PATCH 2/3] Update clang test that use OMPIRBuilder for parallel regions. --- .../OpenMP/irbuilder_nested_parallel_for.c | 18 ++++++++++++++++++ clang/test/OpenMP/nested_loop_codegen.cpp | 4 ++++ clang/test/OpenMP/parallel_codegen.cpp | 2 ++ 3 files changed, 24 insertions(+) diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index ae3570fda412d..5cc5640a5173b 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -1679,6 +1679,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] @@ -1723,6 +1726,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META104:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) @@ -1964,6 +1970,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META172:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) @@ -2122,6 +2131,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META212:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) @@ -2322,6 +2334,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META262:![0-9]+]], !DIExpression(), [[META263:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK-DEBUG: omp.par.region103: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]]) @@ -2402,6 +2417,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META286:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK-DEBUG: omp.par.region44: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]]) diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index d8fab26bf1e7f..9aefc6a739e51 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -889,6 +889,8 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META24:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_K]], [[META26:![0-9]+]], !DIExpression(), [[META25]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] @@ -1066,6 +1068,8 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_RES]], [[META93:![0-9]+]], !DIExpression(), [[META92]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]] diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index c63c6f554f4ae..e8e57aedaa164 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -893,6 +893,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_VLA]], [[META36:![0-9]+]], !DIExpression(), [[META37:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG35:![0-9]+]] @@ -960,6 +961,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOADGEP__RELOADED]], align 8 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_ARGC_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG56:![0-9]+]] >From ac1d3cb31faa03a4e12426c32a676c54f0665b91 Mon Sep 17 00:00:00 2001 From: Abid Qadeer <haqad...@amd.com> Date: Mon, 28 Apr 2025 16:10:05 +0100 Subject: [PATCH 3/3] Add mlir->llvmir test. --- .../LLVMIR/omptarget-parallel-llvm-debug.mlir | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir new file mode 100644 index 0000000000000..3c45f1f1c76fb --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +#di_file = #llvm.di_file<"target.f90" in ""> +#di_null_type = #llvm.di_null_type +#cu = #llvm.di_compile_unit<id = distinct[0]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = false, emissionKind = Full> +#sp_ty = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type> +#sp = #llvm.di_subprogram<compileUnit = #cu, scope = #di_file, name = "test", file = #di_file, subprogramFlags = "Definition", type = #sp_ty> +#int_ty = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed> +#var_x = #llvm.di_local_variable<scope = #sp, name = "x", file = #di_file, type = #int_ty> +module attributes {dlti.dl_spec = #dlti.dl_spec<i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64, "dlti.mangling_mode" = "e">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "x86-64", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/haqadeer/work/src/aomp-llvm-project/flang 793f9220ab32f92fc3b253efec2e332c18090e53)", llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version<version = 52>} { + llvm.func @_QQmain() attributes {fir.bindc_name = "test", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr + llvm.intr.dbg.declare #var_x = %1 : !llvm.ptr loc(#loc2) + %5 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "x"} + omp.target map_entries(%5 -> %arg0 : !llvm.ptr) { + %6 = llvm.mlir.constant(1 : i32) : i32 + llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr loc(#loc2) + omp.parallel { + %7 = llvm.load %arg0 : !llvm.ptr -> i32 + %8 = llvm.add %7, %6 : i32 + llvm.store %8, %arg0 : i32, !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return + } loc(#loc10) +} +#loc1 = loc("target.f90":1:7) +#loc2 = loc("target.f90":3:18) +#loc10 = loc(fused<#sp>[#loc1]) + + +// CHECK: define internal void @__omp_offloading{{.*}}omp_par{{.*}} !dbg ![[FN:[0-9]+]] { +// CHECK-NEXT: omp.par.entry: +// CHECK: #dbg_declare(ptr {{.*}}, ![[VAR:[0-9]+]], {{.*}}) +// CHECK-NEXT: br + +// CHECK: ![[FN]] = {{.*}}!DISubprogram(name: "__omp_offloading_{{.*}}omp_par"{{.*}}) +// CHECK: ![[VAR]] = !DILocalVariable(name: "x", scope: ![[FN]]{{.*}}) + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits