Author: Erich Keane Date: 2025-05-06T13:11:49-07:00 New Revision: bb09f79f0f2b8e0cc3ed01d19ae49afbda8f82d2
URL: https://github.com/llvm/llvm-project/commit/bb09f79f0f2b8e0cc3ed01d19ae49afbda8f82d2 DIFF: https://github.com/llvm/llvm-project/commit/bb09f79f0f2b8e0cc3ed01d19ae49afbda8f82d2.diff LOG: [OpenACC] Implement tile/collapse lowering (#138576) These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/loop.cpp mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index ff0bf6e7f55dd..fa4ce5efc39ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -12,6 +12,7 @@ #include <type_traits> +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" namespace clang { // Simple type-trait to see if the first template arg is one of the list, so we @@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final return conversionOp.getResult(0); } + mlir::Value createConstantInt(mlir::Location loc, unsigned width, + int64_t value) { + mlir::IntegerType ty = mlir::IntegerType::get( + &cgf.getMLIRContext(), width, + mlir::IntegerType::SignednessSemantics::Signless); + auto constOp = builder.create<mlir::arith::ConstantOp>( + loc, builder.getIntegerAttr(ty, value)); + + return constOp.getResult(); + } + mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { // '*' case leaves no identifier-info, just a nullptr. if (!ii) @@ -336,6 +348,50 @@ class OpenACCClauseCIREmitter final return clauseNotImplemented(clause); } } + + void VisitCollapseClause(const OpenACCCollapseClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + llvm::APInt value = + clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext()); + + value = value.sextOrTrunc(64); + operation.setCollapseForDeviceTypes(builder.getContext(), + lastDeviceTypeValues, value); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } + + void VisitTileClause(const OpenACCTileClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + llvm::SmallVector<mlir::Value> values; + + for (const Expr *e : clause.getSizeExprs()) { + mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc()); + + // We represent the * as -1. Additionally, this is a constant, so we + // can always just emit it as 64 bits to avoid having to do any more + // work to determine signedness or size. + if (isa<OpenACCAsteriskSizeExpr>(e)) { + values.push_back(createConstantInt(exprLoc, 64, -1)); + } else { + llvm::APInt curValue = + e->EvaluateKnownConstInt(cgf.cgm.getASTContext()); + values.push_back(createConstantInt( + exprLoc, 64, curValue.sextOrTrunc(64).getSExtValue())); + } + } + + operation.setTileForDeviceTypes(builder.getContext(), + lastDeviceTypeValues, values); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } }; template <typename OpTy> diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp index 2757d935e1f76..b255a01adda0e 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp @@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { // CHECK: acc.loop { // CHECK: acc.yield // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc + + #pragma acc loop collapse(1) device_type(radeon) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]} + + #pragma acc loop collapse(1) device_type(radeon) collapse (2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]} + + #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]} + #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]} + + #pragma acc loop tile(1, 2, 3) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(2) device_type(radeon) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(2) device_type(radeon) tile (1, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index c3df064cf0ead..41b01a14a6498 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>); // Add an entry to the 'auto' attribute for each additional device types. void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>); + + // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note + // that this may only be set once per DeviceType, and will fail the verifier + // if this is set multiple times. + void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>, + llvm::APInt); + // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the + // values should be integral constants, with the '*' represented as a '-1'. + void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>, + mlir::ValueRange); }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 39dbb0c92a309..f26b3a5143c0b 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context, effectiveDeviceTypes)); } +void acc::LoopOp::setCollapseForDeviceTypes( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes, + llvm::APInt value) { + llvm::SmallVector<mlir::Attribute> newValues; + llvm::SmallVector<mlir::Attribute> newDeviceTypes; + + assert((getCollapseAttr() == nullptr) == + (getCollapseDeviceTypeAttr() == nullptr)); + assert(value.getBitWidth() == 64); + + if (getCollapseAttr()) { + for (const auto &existing : + llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) { + newValues.push_back(std::get<0>(existing)); + newDeviceTypes.push_back(std::get<1>(existing)); + } + } + + if (effectiveDeviceTypes.empty()) { + // If the effective device-types list is empty, this is before there are any + // being applied by device_type, so this should be added as a 'none'. + newValues.push_back( + mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value)); + newDeviceTypes.push_back( + acc::DeviceTypeAttr::get(context, DeviceType::None)); + } else { + for (DeviceType DT : effectiveDeviceTypes) { + newValues.push_back( + mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value)); + newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT)); + } + } + + setCollapseAttr(ArrayAttr::get(context, newValues)); + setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes)); +} + +void acc::LoopOp::setTileForDeviceTypes( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes, + ValueRange values) { + llvm::SmallVector<int32_t> segments; + if (getTileOperandsSegments()) + llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments)); + + setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper( + context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values, + getTileOperandsMutable(), segments)); + + setTileOperandsSegments(segments); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits