Author: erichkeane Date: 2025-05-16T08:17:29-07:00 New Revision: 8696d16242d220373460ab17f9fc10b2dd5d38dc
URL: https://github.com/llvm/llvm-project/commit/8696d16242d220373460ab17f9fc10b2dd5d38dc DIFF: https://github.com/llvm/llvm-project/commit/8696d16242d220373460ab17f9fc10b2dd5d38dc.diff LOG: [OpenACC][CIR] Implement 'async' lowering for combined constructs Implementation is 'trivial' as were the rest of the non data clauses, so this implements them, finishing the last non-data/var-list clause for combined constructs. Also ensures this is properly tested. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/combined.cpp clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 5b3fb5527334a..9adbe6a497214 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -266,10 +266,12 @@ class OpenACCClauseCIREmitter final else operation.getAsyncOperandMutable().append( createIntExpr(clause.getIntExpr())); + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an // unreachable. Combined constructs remain. Data, enter data, exit data, - // update, combined constructs remain. + // update constructs remain. return clauseNotImplemented(clause); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 350e5f8efc2bd..d55ce762ce6f1 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -947,4 +947,67 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc +#pragma acc parallel loop async + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.parallel combined(loop) async { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop async(cond) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.serial combined(loop) async(%[[CONV_CAST]] : si32) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop async device_type(nvidia, radeon) async + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop async(3) device_type(nvidia, radeon) async(cond) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop async device_type(nvidia, radeon) async(cond) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.serial combined(loop) async([#acc.device_type<none>], %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop async(3) device_type(nvidia, radeon) async + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i + // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[THREE_CAST]] : si32) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc } diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp index b3299c0b4c137..95b04a314ad8e 100644 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp +++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp @@ -13,7 +13,7 @@ void HelloWorld(int *A, int *B, int *C, int N) { // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: private}} #pragma acc parallel loop private(A) for(int i = 0; i <5; ++i); - // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: async}} -#pragma acc parallel loop async + // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: reduction}} +#pragma acc parallel loop reduction(+:A) for(int i = 0; i <5; ++i); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits