Author: erichkeane Date: 2025-05-09T11:54:16-07:00 New Revision: a783edf3db8eaa9797e25cbece7a71370f968d3d
URL: https://github.com/llvm/llvm-project/commit/a783edf3db8eaa9797e25cbece7a71370f968d3d DIFF: https://github.com/llvm/llvm-project/commit/a783edf3db8eaa9797e25cbece7a71370f968d3d.diff LOG: [OpenACC][CIR] 'tile' lowering for combined constructs This clause requires that we attach it to the 'loop', and can generate variables, so this is the first loop clause to require that we properly set up the insertion location. This patch does so, as a part of lowering 'tile' correctly. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/combined.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 3692560b06e6f..86997dd057aa3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -138,7 +138,8 @@ class OpenACCClauseCIREmitter final template <typename U = void, typename = std::enable_if_t<isCombinedType<OpTy>, U>> void applyToLoopOp(const OpenACCClause &c) { - // TODO OpenACC: we have to set the insertion scope here correctly still. + mlir::OpBuilder::InsertionGuard guardCase(builder); + builder.setInsertionPoint(operation.loopOp); OpenACCClauseCIREmitter<mlir::acc::LoopOp> loopEmitter{ operation.loopOp, cgf, builder, dirKind, dirLoc}; loopEmitter.lastDeviceTypeValues = lastDeviceTypeValues; @@ -448,10 +449,10 @@ class OpenACCClauseCIREmitter final operation.setTileForDeviceTypes(builder.getContext(), lastDeviceTypeValues, values); + } else if constexpr (isCombinedType<OpTy>) { + applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitTileClause"); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index da8347a7f89c4..50c831c286e0e 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -252,4 +252,71 @@ extern "C" void acc_combined(int N) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc + #pragma acc parallel loop tile(1, 2, 3) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: acc.loop combined(parallel) tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + #pragma acc serial loop tile(2) device_type(radeon) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop combined(serial) tile({%[[TWO_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + #pragma acc kernels loop tile(2) device_type(radeon) tile (1, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: acc.kernels combined(loop) { + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop combined(kernels) tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + #pragma acc parallel loop tile(*) device_type(radeon, nvidia) tile (1, 2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop combined(parallel) tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + #pragma acc serial loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop combined(serial) tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits