Author: erichkeane Date: 2025-05-09T11:26:15-07:00 New Revision: 3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99
URL: https://github.com/llvm/llvm-project/commit/3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99 DIFF: https://github.com/llvm/llvm-project/commit/3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99.diff LOG: [OpenACC][CIR] 'if'/'self' combined construct lowering These two require that we correctly set up the 'insertion points' for the compute construct when doing a combined construct. This patch adds that and verifies that we're doing it correctly. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/combined.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 8892c49e41202..3692560b06e6f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -148,7 +148,8 @@ class OpenACCClauseCIREmitter final template <typename U = void, typename = std::enable_if_t<isCombinedType<OpTy>, U>> void applyToComputeOp(const OpenACCClause &c) { - // TODO OpenACC: we have to set the insertion scope here correctly still. + mlir::OpBuilder::InsertionGuard guardCase(builder); + builder.setInsertionPoint(operation.computeOp); OpenACCClauseCIREmitter<typename OpTy::ComputeOpTy> computeEmitter{ operation.computeOp, cgf, builder, dirKind, dirLoc}; computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues; @@ -288,9 +289,11 @@ class OpenACCClauseCIREmitter final } else { llvm_unreachable("var-list version of self shouldn't get here"); } + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. If, combined constructs remain. + // unreachable. update construct remains. return clauseNotImplemented(clause); } } @@ -302,13 +305,15 @@ class OpenACCClauseCIREmitter final mlir::acc::DataOp, mlir::acc::WaitOp>) { operation.getIfCondMutable().append( createCondition(clause.getConditionExpr())); + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); } else { // 'if' applies to most of the constructs, but hold off on lowering them // until we can write tests/know what we're doing with codegen to make // sure we get it right. // TODO: When we've implemented this for everything, switch this to an - // unreachable. Enter data, exit data, host_data, update, combined - // constructs remain. + // unreachable. Enter data, exit data, host_data, update constructs + // remain. return clauseNotImplemented(clause); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 3b2ae8a97d8c5..da8347a7f89c4 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -176,4 +176,80 @@ extern "C" void acc_combined(int N) { // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]} // CHECK: acc.yield // CHECK-NEXT: } loc + +#pragma acc kernels loop self + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.kernels combined(loop) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } attributes {selfAttr} + +#pragma acc serial loop self(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 + // CHECK-NEXT: acc.serial combined(loop) self(%[[CONV_CAST]]) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop if(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 + // CHECK-NEXT: acc.parallel combined(loop) if(%[[CONV_CAST]]) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop if(1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 + // CHECK-NEXT: acc.serial combined(loop) if(%[[CONV_CAST]]) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop if(N == 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[EQ_RES:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[ONE_LITERAL]]) : !s32i, !cir.bool + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES]] : !cir.bool to i1 + // CHECK-NEXT: acc.kernels combined(loop) if(%[[CONV_CAST]]) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop if(N == 1) self(N == 2) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[EQ_RES_IF:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[ONE_LITERAL]]) : !s32i, !cir.bool + // CHECK-NEXT: %[[CONV_CAST_IF:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES_IF]] : !cir.bool to i1 + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[EQ_RES_SELF:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[TWO_LITERAL]]) : !s32i, !cir.bool + // CHECK-NEXT: %[[CONV_CAST_SELF:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES_SELF]] : !cir.bool to i1 + // CHECK-NEXT: acc.parallel combined(loop) self(%[[CONV_CAST_SELF]]) if(%[[CONV_CAST_IF]]) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits