Author: erichkeane Date: 2025-04-21T12:47:47-07:00 New Revision: b7c521b922f8b81544ecb0ccff2847644cac3107
URL: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107 DIFF: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107.diff LOG: [OpenACC][CIR] Lowering for 'vector_length' on compute constructs This is the same as the 'num_workers', with slightly different names in places, so we just do the same exact implementation. This extracts the implementation as well, which should make it easier to reuse. Added: Modified: clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp clang/test/CIR/CodeGenOpenACC/kernels.c clang/test/CIR/CodeGenOpenACC/parallel.c Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index b79baa96a3fc3..e7dd2e74b0864 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -82,6 +82,56 @@ class OpenACCClauseCIREmitter final return conversionOp.getResult(0); } + mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { + // '*' case leaves no identifier-info, just a nullptr. + if (!ii) + return mlir::acc::DeviceType::Star; + return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName()) + .CaseLower("default", mlir::acc::DeviceType::Default) + .CaseLower("host", mlir::acc::DeviceType::Host) + .CaseLower("multicore", mlir::acc::DeviceType::Multicore) + .CasesLower("nvidia", "acc_device_nvidia", + mlir::acc::DeviceType::Nvidia) + .CaseLower("radeon", mlir::acc::DeviceType::Radeon); + } + + // Handle a clause affected by the 'device-type' to the point that they need + // to have the attributes added in the correct/corresponding order, such as + // 'num_workers' or 'vector_length' on a compute construct. + mlir::ArrayAttr + handleDeviceTypeAffectedClause(mlir::ArrayAttr existingDeviceTypes, + mlir::Value argument, + mlir::MutableOperandRange &argCollection) { + llvm::SmallVector<mlir::Attribute> deviceTypes; + + // Collect the 'existing' device-type attributes so we can re-create them + // and insert them. + if (existingDeviceTypes) { + for (const mlir::Attribute &Attr : existingDeviceTypes) + deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( + builder.getContext(), + cast<mlir::acc::DeviceTypeAttr>(Attr).getValue())); + } + + // Insert 1 version of the 'expr' to the NumWorkers list per-current + // device type. + if (lastDeviceTypeClause) { + for (const DeviceTypeArgument &arch : + lastDeviceTypeClause->getArchitectures()) { + deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( + builder.getContext(), decodeDeviceType(arch.getIdentifierInfo()))); + argCollection.append(argument); + } + } else { + // Else, we just add a single for 'none'. + deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( + builder.getContext(), mlir::acc::DeviceType::None)); + argCollection.append(argument); + } + + return mlir::ArrayAttr::get(builder.getContext(), deviceTypes); + } + public: OpenACCClauseCIREmitter(OpTy &operation, CIRGenFunction &cgf, CIRGenBuilderTy &builder, @@ -112,19 +162,6 @@ class OpenACCClauseCIREmitter final } } - mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { - // '*' case leaves no identifier-info, just a nullptr. - if (!ii) - return mlir::acc::DeviceType::Star; - return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName()) - .CaseLower("default", mlir::acc::DeviceType::Default) - .CaseLower("host", mlir::acc::DeviceType::Host) - .CaseLower("multicore", mlir::acc::DeviceType::Multicore) - .CasesLower("nvidia", "acc_device_nvidia", - mlir::acc::DeviceType::Nvidia) - .CaseLower("radeon", mlir::acc::DeviceType::Radeon); - } - void VisitDeviceTypeClause(const OpenACCDeviceTypeClause &clause) { lastDeviceTypeClause = &clause; if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) { @@ -165,38 +202,10 @@ class OpenACCClauseCIREmitter final void VisitNumWorkersClause(const OpenACCNumWorkersClause &clause) { if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) { - // Collect the 'existing' device-type attributes so we can re-create them - // and insert them. - llvm::SmallVector<mlir::Attribute> deviceTypes; - mlir::ArrayAttr existingDeviceTypes = - operation.getNumWorkersDeviceTypeAttr(); - - if (existingDeviceTypes) { - for (mlir::Attribute Attr : existingDeviceTypes) - deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( - builder.getContext(), - cast<mlir::acc::DeviceTypeAttr>(Attr).getValue())); - } - - // Insert 1 version of the 'int-expr' to the NumWorkers list per-current - // device type. - mlir::Value intExpr = createIntExpr(clause.getIntExpr()); - if (lastDeviceTypeClause) { - for (const DeviceTypeArgument &arg : - lastDeviceTypeClause->getArchitectures()) { - deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( - builder.getContext(), decodeDeviceType(arg.getIdentifierInfo()))); - operation.getNumWorkersMutable().append(intExpr); - } - } else { - // Else, we just add a single for 'none'. - deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( - builder.getContext(), mlir::acc::DeviceType::None)); - operation.getNumWorkersMutable().append(intExpr); - } - - operation.setNumWorkersDeviceTypeAttr( - mlir::ArrayAttr::get(builder.getContext(), deviceTypes)); + mlir::MutableOperandRange range = operation.getNumWorkersMutable(); + operation.setNumWorkersDeviceTypeAttr(handleDeviceTypeAffectedClause( + operation.getNumWorkersDeviceTypeAttr(), + createIntExpr(clause.getIntExpr()), range)); } else if constexpr (isOneOfTypes<OpTy, SerialOp>) { llvm_unreachable("num_workers not valid on serial"); } else { @@ -204,6 +213,19 @@ class OpenACCClauseCIREmitter final } } + void VisitVectorLengthClause(const OpenACCVectorLengthClause &clause) { + if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) { + mlir::MutableOperandRange range = operation.getVectorLengthMutable(); + operation.setVectorLengthDeviceTypeAttr(handleDeviceTypeAffectedClause( + operation.getVectorLengthDeviceTypeAttr(), + createIntExpr(clause.getIntExpr()), range)); + } else if constexpr (isOneOfTypes<OpTy, SerialOp>) { + llvm_unreachable("vector_length not valid on serial"); + } else { + return clauseNotImplemented(clause); + } + } + void VisitSelfClause(const OpenACCSelfClause &clause) { if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) { if (clause.isEmptySelfClause()) { diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c index 6459b310546cd..d2da1d18f1534 100644 --- a/clang/test/CIR/CodeGenOpenACC/kernels.c +++ b/clang/test/CIR/CodeGenOpenACC/kernels.c @@ -158,5 +158,57 @@ void acc_kernels(int cond) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc +#pragma acc kernels vector_length(cond) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2u) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc kernels vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(3) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>], %[[THREE_CAST]] : si32 [#acc.device_type<multicore>]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc kernels device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3) + {} + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: cir.return } diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c index bdb506ee7e1d2..61dccc591c252 100644 --- a/clang/test/CIR/CodeGenOpenACC/parallel.c +++ b/clang/test/CIR/CodeGenOpenACC/parallel.c @@ -157,5 +157,57 @@ void acc_parallel(int cond) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc +#pragma acc parallel vector_length(cond) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2u) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4) + {} + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i + // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3) + {} + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: cir.return } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits