https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/130078
>From 9de8c664bad3a851e3b9644711b24c6449db9e49 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Thu, 6 Mar 2025 03:16:59 -0600 Subject: [PATCH 1/4] [flang][OpenMP] Translate OpenMP scopes when compiling for target device If a `target` directive is nested in a host OpenMP directive (e.g. parallel, task, or a worksharing loop), flang currently crashes if the target directive-related MLIR ops (e.g. `omp.map.bounds` and `omp.map.info` depends on SSA values defined inside the parent host OpenMP directives/ops. This PR tries to solve this problem by treating these parent OpenMP ops as "SSA scopes". Whenever we are translating for the device, instead of completely translating host ops, we just tranlate their MLIR ops as pure SSA values. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 59 ++++++-- .../openmp-target-nesting-in-host-ops.mlir | 136 ++++++++++++++++++ 2 files changed, 186 insertions(+), 9 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b9893716980fe..f277f35fa51eb 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -537,6 +537,19 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { llvm_unreachable("Unknown ClauseProcBindKind kind"); } +/// Maps elements of \p blockArgs (which are MLIR values) to the corresponding +/// LLVM values of \p operands' elements. This is useful when an OpenMP region +/// with entry block arguments is converted to LLVM. In this case \p blockArgs +/// are (part of) of the OpenMP region's entry arguments and \p operands are +/// (part of) of the operands to the OpenMP op containing the region. +static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, + omp::BlockArgOpenMPOpInterface blockArgIface) { + llvm::SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs; + blockArgIface.getBlockArgsPairs(blockArgsPairs); + for (auto [var, arg] : blockArgsPairs) + moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var)); +} + /// Helper function to map block arguments defined by ignored loop wrappers to /// LLVM values and prevent any uses of those from triggering null pointer /// dereferences. @@ -549,17 +562,10 @@ convertIgnoredWrapper(omp::LoopWrapperInterface opInst, // Map block arguments directly to the LLVM value associated to the // corresponding operand. This is semantically equivalent to this wrapper not // being present. - auto forwardArgs = - [&moduleTranslation](omp::BlockArgOpenMPOpInterface blockArgIface) { - llvm::SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs; - blockArgIface.getBlockArgsPairs(blockArgsPairs); - for (auto [var, arg] : blockArgsPairs) - moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var)); - }; - return llvm::TypeSwitch<Operation *, LogicalResult>(opInst) .Case([&](omp::SimdOp op) { - forwardArgs(cast<omp::BlockArgOpenMPOpInterface>(*op)); + forwardArgs(moduleTranslation, + cast<omp::BlockArgOpenMPOpInterface>(*op)); op.emitWarning() << "simd information on composite construct discarded"; return success(); }) @@ -5294,6 +5300,7 @@ convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, return convertHostOrTargetOperation(op, builder, moduleTranslation); } + static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -5313,6 +5320,40 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, return WalkResult::interrupt(); return WalkResult::skip(); } + + // Non-target ops might nest target-related ops, therefore, we + // translate them as non-OpenMP scopes. Translating them is needed by + // nested target-related ops since they might LLVM values defined in + // their parent non-target ops. + if (isa<omp::OpenMPDialect>(oper->getDialect()) && + oper->getParentOfType<LLVM::LLVMFuncOp>() && + !oper->getRegions().empty()) { + if (auto blockArgsIface = + dyn_cast<omp::BlockArgOpenMPOpInterface>(oper)) + forwardArgs(moduleTranslation, blockArgsIface); + + if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) { + for (auto iv : loopNest.getIVs()) { + // Create fake allocas just to maintain IR validity. + moduleTranslation.mapValue( + iv, builder.CreateAlloca( + moduleTranslation.convertType(iv.getType()))); + } + } + + for (Region ®ion : oper->getRegions()) { + auto result = convertOmpOpRegions( + region, oper->getName().getStringRef().str() + ".fake.region", + builder, moduleTranslation); + if (failed(handleError(result, *oper))) + return WalkResult::interrupt(); + + builder.SetInsertPoint(result.get(), result.get()->end()); + } + + return WalkResult::skip(); + } + return WalkResult::advance(); }).wasInterrupted(); return failure(interrupted); diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir new file mode 100644 index 0000000000000..2b3bde46a787c --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir @@ -0,0 +1,136 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { + + omp.private {type = private} @i32_privatizer : i32 + + llvm.func @test_nested_target_in_parallel(%arg0: !llvm.ptr) { + omp.parallel { + %0 = llvm.mlir.constant(4 : index) : i64 + %1 = llvm.mlir.constant(1 : index) : i64 + %4 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64) + %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""} + omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) { + omp.terminator + } + omp.terminator + } + llvm.return + } + +// CHECK-LABEL: define void @test_nested_target_in_parallel({{.*}}) { +// CHECK-NEXT: br label %omp.parallel.fake.region +// CHECK: omp.parallel.fake.region: +// CHECK-NEXT: br label %omp.region.cont +// CHECK: omp.region.cont: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + llvm.func @test_nested_target_in_wsloop(%arg0: !llvm.ptr) { + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + %16 = llvm.mlir.constant(10 : i32) : i32 + %17 = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop private(@i32_privatizer %9 -> %loop_arg : !llvm.ptr) { + omp.loop_nest (%arg1) : i32 = (%17) to (%16) inclusive step (%17) { + llvm.store %arg1, %loop_arg : i32, !llvm.ptr + %0 = llvm.mlir.constant(4 : index) : i64 + %1 = llvm.mlir.constant(1 : index) : i64 + %4 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64) + %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""} + omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) { + omp.terminator + } + omp.yield + } + } + llvm.return + } + +// CHECK-LABEL: define void @test_nested_target_in_wsloop(ptr %0) { +// CHECK-NEXT: %{{.*}} = alloca i32, i64 1, align 4 +// CHECK-NEXT: br label %omp.wsloop.fake.region +// CHECK: omp.wsloop.fake.region: +// CHECK-NEXT: %{{.*}} = alloca i32, align 4 +// CHECK-NEXT: br label %omp.loop_nest.fake.region +// CHECK: omp.loop_nest.fake.region: +// CHECK-NEXT: store ptr %3, ptr %2, align 8 +// CHECK-NEXT: br label %omp.region.cont1 +// CHECK: omp.region.cont1: +// CHECK-NEXT: br label %omp.region.cont +// CHECK: omp.region.cont: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + llvm.func @test_nested_target_in_parallel_with_private(%arg0: !llvm.ptr) { + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + omp.parallel private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) { + %1 = llvm.mlir.constant(1 : index) : i64 + // Use the private clause from omp.parallel to make sure block arguments + // are handled. + %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64 + %4 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64) + %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""} + omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) { + omp.terminator + } + omp.terminator + } + llvm.return + } + + llvm.func @test_nested_target_in_task_with_private(%arg0: !llvm.ptr) { + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + omp.task private(@i32_privatizer %9 -> %i_priv_arg : !llvm.ptr) { + %1 = llvm.mlir.constant(1 : index) : i64 + // Use the private clause from omp.task to make sure block arguments + // are handled. + %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64 + %4 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64) + %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""} + omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) { + omp.terminator + } + omp.terminator + } + llvm.return + } + +// CHECK-LABEL: define void @test_nested_target_in_parallel_with_private({{.*}}) { +// CHECK: br label %omp.parallel.fake.region +// CHECK: omp.parallel.fake.region: +// CHECK: br label %omp.region.cont +// CHECK: omp.region.cont: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}} { +// CHECK: call i32 @__kmpc_target_init +// CHECK: user_code.entry: +// CHECK: call void @__kmpc_target_deinit() +// CHECK: ret void +// CHECK: } + +// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_wsloop_{{.*}} { +// CHECK: call i32 @__kmpc_target_init +// CHECK: user_code.entry: +// CHECK: call void @__kmpc_target_deinit() +// CHECK: ret void +// CHECK: } + +// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_parallel_with_private_{{.*}} { +// CHECK: call i32 @__kmpc_target_init +// CHECK: user_code.entry: +// CHECK: call void @__kmpc_target_deinit() +// CHECK: ret void +// CHECK: } + +// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_task_with_private_{{.*}} { +// CHECK: call i32 @__kmpc_target_init +// CHECK: user_code.entry: +// CHECK: call void @__kmpc_target_deinit() +// CHECK: ret void +// CHECK: } +} >From 1d8b2bf5eee51c5f68ba2e0a3ab2e4662e1bb388 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Tue, 11 Mar 2025 00:57:03 -0500 Subject: [PATCH 2/4] handle some review comments --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f277f35fa51eb..0b91fe540a948 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -542,6 +542,14 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { /// with entry block arguments is converted to LLVM. In this case \p blockArgs /// are (part of) of the OpenMP region's entry arguments and \p operands are /// (part of) of the operands to the OpenMP op containing the region. +/// +/// This function assumes that \p operands belong to the enclosing op of the +/// block containing \p blockArgs: +/// ``` +/// enclosing_op(operands) { +/// block(blockArgs) +/// } +/// ``` static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface) { llvm::SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs; @@ -5300,7 +5308,6 @@ convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, return convertHostOrTargetOperation(op, builder, moduleTranslation); } - static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -5323,8 +5330,8 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, // Non-target ops might nest target-related ops, therefore, we // translate them as non-OpenMP scopes. Translating them is needed by - // nested target-related ops since they might LLVM values defined in - // their parent non-target ops. + // nested target-related ops since they might need LLVM values defined + // in their parent non-target ops. if (isa<omp::OpenMPDialect>(oper->getDialect()) && oper->getParentOfType<LLVM::LLVMFuncOp>() && !oper->getRegions().empty()) { @@ -5333,6 +5340,8 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, forwardArgs(moduleTranslation, blockArgsIface); if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) { + assert(builder.GetInsertBlock() && + "No insert block is set for the builder"); for (auto iv : loopNest.getIVs()) { // Create fake allocas just to maintain IR validity. moduleTranslation.mapValue( @@ -5342,6 +5351,10 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, } for (Region ®ion : oper->getRegions()) { + // Regions are fake in the sense that they are not a truthful + // translation of the OpenMP construct being converted (e.g. no + // OpenMP runtime calls will be generated). We just need this to + // prepare the kernel invocation args. auto result = convertOmpOpRegions( region, oper->getName().getStringRef().str() + ".fake.region", builder, moduleTranslation); >From 57679e15db18bd18585289058681e33db98ddba6 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Wed, 12 Mar 2025 03:24:35 -0500 Subject: [PATCH 3/4] update docs --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 0b91fe540a948..3f1b7952554af 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -537,19 +537,12 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { llvm_unreachable("Unknown ClauseProcBindKind kind"); } -/// Maps elements of \p blockArgs (which are MLIR values) to the corresponding -/// LLVM values of \p operands' elements. This is useful when an OpenMP region -/// with entry block arguments is converted to LLVM. In this case \p blockArgs -/// are (part of) of the OpenMP region's entry arguments and \p operands are -/// (part of) of the operands to the OpenMP op containing the region. -/// -/// This function assumes that \p operands belong to the enclosing op of the -/// block containing \p blockArgs: -/// ``` -/// enclosing_op(operands) { -/// block(blockArgs) -/// } -/// ``` +/// Maps block arguments from \p blockArgIface (which are MLIR values) to the +/// corresponding LLVM values of \p the interface's operands. This is useful +/// when an OpenMP region with entry block arguments is converted to LLVM. In +/// this case the block arguments are (part of) of the OpenMP region's entry +/// arguments and the operands are (part of) of the operands to the OpenMP op +/// containing the region. static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface) { llvm::SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs; >From 3480c759059cac9fea49b0591d26346cd3d89df4 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Wed, 12 Mar 2025 10:25:09 -0500 Subject: [PATCH 4/4] handle review comments --- .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 5 +++-- .../Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3f1b7952554af..8a12ae152c366 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -5336,9 +5337,9 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, assert(builder.GetInsertBlock() && "No insert block is set for the builder"); for (auto iv : loopNest.getIVs()) { - // Create fake allocas just to maintain IR validity. + // Map iv to an undefined value just to keep the IR validity. moduleTranslation.mapValue( - iv, builder.CreateAlloca( + iv, llvm::PoisonValue::get( moduleTranslation.convertType(iv.getType()))); } } diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir index 2b3bde46a787c..71a4c29eaf0aa 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir @@ -51,10 +51,9 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, // CHECK-NEXT: %{{.*}} = alloca i32, i64 1, align 4 // CHECK-NEXT: br label %omp.wsloop.fake.region // CHECK: omp.wsloop.fake.region: -// CHECK-NEXT: %{{.*}} = alloca i32, align 4 // CHECK-NEXT: br label %omp.loop_nest.fake.region // CHECK: omp.loop_nest.fake.region: -// CHECK-NEXT: store ptr %3, ptr %2, align 8 +// CHECK-NEXT: store i32 poison, ptr %{{.*}} // CHECK-NEXT: br label %omp.region.cont1 // CHECK: omp.region.cont1: // CHECK-NEXT: br label %omp.region.cont _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits