https://github.com/erichkeane updated https://github.com/llvm/llvm-project/pull/138576
>From 57c9faf4a0bc4a589f56fee528df8b06bdec7e54 Mon Sep 17 00:00:00 2001 From: erichkeane <eke...@nvidia.com> Date: Mon, 5 May 2025 10:16:35 -0700 Subject: [PATCH 1/3] [OpenACC] Implement tile/collapse lowering These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'. --- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 58 +++++++++++++ clang/test/CIR/CodeGenOpenACC/loop.cpp | 84 +++++++++++++++++++ .../mlir/Dialect/OpenACC/OpenACCOps.td | 10 +++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 51 +++++++++++ 4 files changed, 203 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index ff0bf6e7f55dd..7223a8ed15bd5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -12,6 +12,7 @@ #include <type_traits> +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" namespace clang { // Simple type-trait to see if the first template arg is one of the list, so we @@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final return conversionOp.getResult(0); } + mlir::Value createConstantInt(mlir::Location loc, unsigned width, + int64_t value) { + mlir::IntegerType ty = mlir::IntegerType::get( + &cgf.getMLIRContext(), width, + mlir::IntegerType::SignednessSemantics::Signless); + auto constOp = builder.create<mlir::arith::ConstantOp>( + loc, ty, builder.getIntegerAttr(ty, value)); + + return constOp.getResult(); + } + mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { // '*' case leaves no identifier-info, just a nullptr. if (!ii) @@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final return clauseNotImplemented(clause); } } + + void VisitCollapseClause(const OpenACCCollapseClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + llvm::APInt value = + clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext()); + + if (value.getBitWidth() != 64) + value = value.sext(64); + + operation.setCollapseForDeviceTypes(builder.getContext(), + lastDeviceTypeValues, value); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } + + void VisitTileClause(const OpenACCTileClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + llvm::SmallVector<mlir::Value> values; + + for (const Expr *e : clause.getSizeExprs()) { + mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc()); + + // We represent the * as -1. Additionally, this is a constant, so we + // can always just emit it as 64 bits to avoid having to do any more + // work to determine signedness or size. + if (isa<OpenACCAsteriskSizeExpr>(e)) { + values.push_back(createConstantInt(exprLoc, 64, -1)); + } else { + llvm::APInt curValue = + e->EvaluateKnownConstInt(cgf.cgm.getASTContext()); + values.push_back( + createConstantInt(exprLoc, 64, curValue.getSExtValue())); + } + } + + operation.setTileForDeviceTypes(builder.getContext(), + lastDeviceTypeValues, values); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } }; template <typename OpTy> diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp index 2757d935e1f76..b255a01adda0e 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp @@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { // CHECK: acc.loop { // CHECK: acc.yield // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc + + #pragma acc loop collapse(1) device_type(radeon) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]} + + #pragma acc loop collapse(1) device_type(radeon) collapse (2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]} + + #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]} + #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: acc.loop { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]} + + #pragma acc loop tile(1, 2, 3) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(2) device_type(radeon) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(2) device_type(radeon) tile (1, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *) + for(unsigned I = 0; I < N; ++I) + for(unsigned J = 0; J < N; ++J) + for(unsigned K = 0; K < N; ++K); + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 + // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64 + // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64 + // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index c3df064cf0ead..41b01a14a6498 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>); // Add an entry to the 'auto' attribute for each additional device types. void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>); + + // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note + // that this may only be set once per DeviceType, and will fail the verifier + // if this is set multiple times. + void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>, + llvm::APInt); + // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the + // values should be integral constants, with the '*' represented as a '-1'. + void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>, + mlir::ValueRange); }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 39dbb0c92a309..f26b3a5143c0b 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context, effectiveDeviceTypes)); } +void acc::LoopOp::setCollapseForDeviceTypes( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes, + llvm::APInt value) { + llvm::SmallVector<mlir::Attribute> newValues; + llvm::SmallVector<mlir::Attribute> newDeviceTypes; + + assert((getCollapseAttr() == nullptr) == + (getCollapseDeviceTypeAttr() == nullptr)); + assert(value.getBitWidth() == 64); + + if (getCollapseAttr()) { + for (const auto &existing : + llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) { + newValues.push_back(std::get<0>(existing)); + newDeviceTypes.push_back(std::get<1>(existing)); + } + } + + if (effectiveDeviceTypes.empty()) { + // If the effective device-types list is empty, this is before there are any + // being applied by device_type, so this should be added as a 'none'. + newValues.push_back( + mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value)); + newDeviceTypes.push_back( + acc::DeviceTypeAttr::get(context, DeviceType::None)); + } else { + for (DeviceType DT : effectiveDeviceTypes) { + newValues.push_back( + mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value)); + newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT)); + } + } + + setCollapseAttr(ArrayAttr::get(context, newValues)); + setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes)); +} + +void acc::LoopOp::setTileForDeviceTypes( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes, + ValueRange values) { + llvm::SmallVector<int32_t> segments; + if (getTileOperandsSegments()) + llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments)); + + setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper( + context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values, + getTileOperandsMutable(), segments)); + + setTileOperandsSegments(segments); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===// >From a105da110a4c9179d5e99f8919290213d4b0dd66 Mon Sep 17 00:00:00 2001 From: erichkeane <eke...@nvidia.com> Date: Tue, 6 May 2025 06:25:47 -0700 Subject: [PATCH 2/3] Be more tolerant of constant integral sizes --- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 7223a8ed15bd5..69025d038d02c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -354,9 +354,7 @@ class OpenACCClauseCIREmitter final llvm::APInt value = clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext()); - if (value.getBitWidth() != 64) - value = value.sext(64); - + value = value.sextOrTrunc(64); operation.setCollapseForDeviceTypes(builder.getContext(), lastDeviceTypeValues, value); } else { @@ -381,8 +379,8 @@ class OpenACCClauseCIREmitter final } else { llvm::APInt curValue = e->EvaluateKnownConstInt(cgf.cgm.getASTContext()); - values.push_back( - createConstantInt(exprLoc, 64, curValue.getSExtValue())); + values.push_back(createConstantInt( + exprLoc, 64, curValue.sextOrTrunc(64).getSExtValue())); } } >From e7646cc7b8d01ba2ed4c5a8d6209c76d9f382d47 Mon Sep 17 00:00:00 2001 From: erichkeane <eke...@nvidia.com> Date: Tue, 6 May 2025 07:14:51 -0700 Subject: [PATCH 3/3] Remove unneeded type specification --- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 69025d038d02c..fa4ce5efc39ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -89,7 +89,7 @@ class OpenACCClauseCIREmitter final &cgf.getMLIRContext(), width, mlir::IntegerType::SignednessSemantics::Signless); auto constOp = builder.create<mlir::arith::ConstantOp>( - loc, ty, builder.getIntegerAttr(ty, value)); + loc, builder.getIntegerAttr(ty, value)); return constOp.getResult(); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits