https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/150036
>From d28c1a4958738e3ada876a831d9179cf134a4e66 Mon Sep 17 00:00:00 2001 From: Ross Brunton <r...@codeplay.com> Date: Tue, 22 Jul 2025 15:50:18 +0100 Subject: [PATCH 1/3] [Offload] Add (a new version of) olWaitQueue Not to be confused with olSyncQueue, which used to be called olWaitQueue until #150023. This function causes a queue to wait until all the provided events have completed before running any future scheduled work. --- offload/liboffload/API/Queue.td | 17 ++ offload/liboffload/src/OffloadImpl.cpp | 22 +++ offload/unittests/OffloadAPI/CMakeLists.txt | 3 +- .../OffloadAPI/device_code/CMakeLists.txt | 2 + .../OffloadAPI/device_code/sequence.c | 11 ++ .../OffloadAPI/queue/olWaitQueue.cpp | 148 ++++++++++++++++++ 6 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 offload/unittests/OffloadAPI/device_code/sequence.c create mode 100644 offload/unittests/OffloadAPI/queue/olWaitQueue.cpp diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index 19327cdab4254..43c723de54510 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -41,6 +41,23 @@ def : Function { let returns = []; } +def : Function { + let name = "olWaitQueue"; + let desc = "Make any future work submitted to this queue wait until the provided events are complete."; + let details = [ + "All events in `Events` must complete beforet he queue is unblocked.", + "The input events can be from any queue on any device provided by the same platform as `Queue`.", + ]; + let params = [ + Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>, + Param<"ol_event_handle_t *", "Events", "list of `NumEvents` events to wait for", PARAM_IN>, + Param<"size_t", "NumEvents", "size of `Events`", PARAM_IN>, + ]; + let returns = [ + Return<"OL_ERRC_INVALID_NULL_HANDLE", ["Any event handle in the list is NULL"]>, + ]; +} + def : Enum { let name = "ol_queue_info_t"; let desc = "Supported queue info."; diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index d93e4f1db58a7..c155a6b85387c 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -500,6 +500,28 @@ Error olSyncQueue_impl(ol_queue_handle_t Queue) { return Error::success(); } +Error olWaitQueue_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, + size_t NumEvents) { + auto *Device = Queue->Device->Device; + + for (size_t I = 0; I < NumEvents; I++) { + auto *Event = Events[I]; + + if (!Event) + return Plugin::error(ErrorCode::INVALID_NULL_HANDLE, + "olWaitQueue asked to wait on a NULL event"); + + // Do nothing if the event is for this queue + if (Event->Queue == Queue) + continue; + + if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo)) + return Err; + } + + return Error::success(); +} + Error olGetQueueInfoImplDetail(ol_queue_handle_t Queue, ol_queue_info_t PropName, size_t PropSize, void *PropValue, size_t *PropSizeRet) { diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index f09cfc6bb0876..2621eaeb64e82 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -39,7 +39,8 @@ add_offload_unittest("queue" queue/olSyncQueue.cpp queue/olDestroyQueue.cpp queue/olGetQueueInfo.cpp - queue/olGetQueueInfoSize.cpp) + queue/olGetQueueInfoSize.cpp + queue/olWaitQueue.cpp) add_offload_unittest("symbol" symbol/olGetSymbol.cpp diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 11c8ccbd6c7c5..0e4695ee9969f 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -8,6 +8,7 @@ add_offload_test_device_code(localmem_static.c localmem_static) add_offload_test_device_code(global.c global) add_offload_test_device_code(global_ctor.c global_ctor) add_offload_test_device_code(global_dtor.c global_dtor) +add_offload_test_device_code(sequence.c sequence) add_custom_target(offload_device_binaries DEPENDS foo.bin @@ -19,5 +20,6 @@ add_custom_target(offload_device_binaries DEPENDS global.bin global_ctor.bin global_dtor.bin + sequence.bin ) set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.c new file mode 100644 index 0000000000000..22504086ffa38 --- /dev/null +++ b/offload/unittests/OffloadAPI/device_code/sequence.c @@ -0,0 +1,11 @@ +#include <gpuintrin.h> +#include <stdint.h> + +__gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { + if (idx == 0) + inout[idx] = 0; + else if (idx == 1) + inout[idx] = 1; + else + inout[idx] = inout[idx-1] + inout[idx-2]; +} diff --git a/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp new file mode 100644 index 0000000000000..fdf272dafa911 --- /dev/null +++ b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp @@ -0,0 +1,148 @@ +//===------- Offload API tests - olWaitQueue ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include <OffloadAPI.h> +#include <gtest/gtest.h> + +struct olWaitQueueTest : OffloadProgramTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUpWith("sequence")); + ASSERT_SUCCESS( + olGetSymbol(Program, "sequence", OL_SYMBOL_KIND_KERNEL, &Kernel)); + LaunchArgs.Dimensions = 1; + LaunchArgs.GroupSize = {1, 1, 1}; + LaunchArgs.NumGroups = {1, 1, 1}; + LaunchArgs.DynSharedMemory = 0; + } + + void TearDown() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown()); + } + + ol_symbol_handle_t Kernel = nullptr; + ol_kernel_launch_size_args_t LaunchArgs{}; +}; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitQueueTest); + +TEST_P(olWaitQueueTest, Success) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queues[NUM_KERNELS]; + ol_event_handle_t Events[NUM_KERNELS]; + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queues[I], &Events[I - 1], 1)); + + ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, + sizeof(Args), &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, SuccessSingleQueue) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queue; + ol_event_handle_t Events[NUM_KERNELS]; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queue, &Events[I - 1], 1)); + + ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), + &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, SuccessMultipleEvents) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queues[NUM_KERNELS]; + ol_event_handle_t Events[NUM_KERNELS]; + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queues[I], Events, I)); + + ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, + sizeof(Args), &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, InvalidNullQueue) { + ol_event_handle_t Event; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(nullptr, &Event, 1)); +} + +TEST_P(olWaitQueueTest, InvalidNullEvent) { + ol_queue_handle_t Queue; + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olWaitQueue(Queue, nullptr, 1)); +} + +TEST_P(olWaitQueueTest, InvalidNullInnerEvent) { + ol_queue_handle_t Queue; + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + ol_event_handle_t Event = nullptr; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(Queue, &Event, 1)); +} >From ceacd7f70f6dcfd30e879c4c2e4a7326eeaef247 Mon Sep 17 00:00:00 2001 From: Ross Brunton <r...@codeplay.com> Date: Tue, 22 Jul 2025 15:59:15 +0100 Subject: [PATCH 2/3] Clang-format --- offload/unittests/OffloadAPI/device_code/sequence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.c index 22504086ffa38..7662f2d817496 100644 --- a/offload/unittests/OffloadAPI/device_code/sequence.c +++ b/offload/unittests/OffloadAPI/device_code/sequence.c @@ -7,5 +7,5 @@ __gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { else if (idx == 1) inout[idx] = 1; else - inout[idx] = inout[idx-1] + inout[idx-2]; + inout[idx] = inout[idx - 1] + inout[idx - 2]; } >From ef58e6a7cee20c9263a1fdaa4589c7c08831933c Mon Sep 17 00:00:00 2001 From: Ross Brunton <r...@codeplay.com> Date: Tue, 22 Jul 2025 16:01:05 +0100 Subject: [PATCH 3/3] Typo --- offload/liboffload/API/Queue.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index 43c723de54510..1193f40fc96ab 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -45,7 +45,7 @@ def : Function { let name = "olWaitQueue"; let desc = "Make any future work submitted to this queue wait until the provided events are complete."; let details = [ - "All events in `Events` must complete beforet he queue is unblocked.", + "All events in `Events` must complete before the queue is unblocked.", "The input events can be from any queue on any device provided by the same platform as `Queue`.", ]; let params = [ _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits