opencl/opencltest/main.cxx |  110 ++++++++++++++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 35 deletions(-)

New commits:
commit fde5f8afc316ffb53f672c04107d92356c072d1c
Author:     Julien Nabet <[email protected]>
AuthorDate: Wed Jul 9 23:21:45 2025 +0200
Commit:     Julien Nabet <[email protected]>
CommitDate: Tue Sep 9 09:24:29 2025 +0200

    Related tdf#167365: some improvements in opencltest made by ChatGPT
    
    Summary of changes (also by ChatGPT)
    1) Robust platform / device discovery
        Checks for zero platforms or zero devices and aborts with a clear 
SAL_WARN.
        Fetches platform and device names with a two‑step clGet*Info (query 
length → retrieve string) to avoid 64‑byte truncation.
        Uses std::string and guarantees NUL‑termination before comparison.
        Log message now shows both platform and device if either is missing.
    
    2) Safer buffers for names
        Replaced fixed 64‑byte buffers with dynamically‑sized strings (full len 
from OpenCL).
    
    3) Improved error diagnostics
        Adds explicit warnings when no platforms or no devices are detected.
        Enhances “device not found” warning to include the platform name.
    
    4) Correct work‑group sizing
        Introduces localSize = 64 and rounds globalWorkSize up to the nearest 
multiple of 64 to prevent CL_INVALID_WORK_GROUP_SIZE (-54).
    
    5) Cleaner buffer handling
        Switches input buffers to CL_MEM_COPY_HOST_PTR (safer than USE_HOST_PTR 
here).
        Creates output buffer with the rounded‑up size, avoiding host‑ptr 
misuse.
    
    6) Kernel launch & read‑back tweaks
        Sets kernel arguments after the buffers are created.
        Uses the new globalWorkSize/localSize pair in a single, clearer 
clEnqueueNDRangeKernel call.
        Reads back only the required dataSize elements.
    
    7) Code style & comments
        Adds explanatory comments (rounding logic, 1‑D kernel launch, etc.).
        Tidies long lines and wraps for readability.
    
    Change-Id: I7cefc788fa6638f31f8a47040a8e90fbc6e02aff
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/187603
    Reviewed-by: Julien Nabet <[email protected]>
    Tested-by: Jenkins

diff --git a/opencl/opencltest/main.cxx b/opencl/opencltest/main.cxx
index 0db18b0042d1..010539fcf69e 100644
--- a/opencl/opencltest/main.cxx
+++ b/opencl/opencltest/main.cxx
@@ -39,23 +39,40 @@ static void runTest(const char* deviceName, const char* 
devicePlatform)
     int status = clewInit(OPENCL_DLL_NAME);
     check(status, CLEW_SUCCESS);
 
-    // Find the given OpenCL device (in order to use the same one as LO core).
-    cl_uint numPlatforms;
+    // Find the given OpenCL device (to match LO core config)
+    cl_uint numPlatforms = 0;
     openclcheck(clGetPlatformIDs(0, nullptr, &numPlatforms));
+    if (numPlatforms == 0)
+    {
+        SAL_WARN("opencl", "No OpenCL platforms found");
+        return;
+    }
+
     std::vector<cl_platform_id> platforms(numPlatforms);
     openclcheck(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr));
+
     cl_platform_id platformId = nullptr;
     for (cl_uint i = 0; i < numPlatforms; ++i)
     {
-        char platformName[64];
-        if (clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 64, 
platformName, nullptr)
-                == CL_SUCCESS
-            && strcmp(devicePlatform, platformName) == 0)
+        size_t len = 0;
+        if (clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 0, nullptr, 
&len) != CL_SUCCESS)
+            continue;
+
+        std::string platformName(len, '
+        if (clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, len, 
&platformName[0], nullptr)
+            != CL_SUCCESS)
+            continue;
+
+        // We want the string terminated with null
+        platformName.back() = '
+
+        if (strcmp(devicePlatform, platformName.c_str()) == 0)
         {
             platformId = platforms[i];
             break;
         }
     }
+
     if (platformId == nullptr)
     {
         SAL_WARN("opencl", "Device platform not found: " << devicePlatform);
@@ -63,25 +80,41 @@ static void runTest(const char* deviceName, const char* 
devicePlatform)
         return;
     }
 
-    cl_uint numDevices;
+    cl_uint numDevices = 0;
     openclcheck(clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL, 0, nullptr, 
&numDevices));
+    if (numDevices == 0)
+    {
+        SAL_WARN("opencl", "No OpenCL devices found on platform " << 
devicePlatform);
+        return;
+    }
+
     std::vector<cl_device_id> devices(numDevices);
     openclcheck(
         clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL, numDevices, 
devices.data(), nullptr));
+
     cl_device_id deviceId = nullptr;
     for (cl_uint i = 0; i < numDevices; ++i)
     {
-        char name[1024];
-        if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 64, name, nullptr) == 
CL_SUCCESS
-            && strcmp(deviceName, name) == 0)
+        size_t len = 0;
+        if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, nullptr, &len) != 
CL_SUCCESS)
+            continue;
+
+        std::string name(len, '
+        if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, len, &name[0], 
nullptr) != CL_SUCCESS)
+            continue;
+
+        name.back() = '
+
+        if (strcmp(deviceName, name.c_str()) == 0)
         {
             deviceId = devices[i];
             break;
         }
     }
+
     if (deviceId == nullptr)
     {
-        SAL_WARN("opencl", "Device not found: " << deviceName);
+        SAL_WARN("opencl", "Device not found: " << deviceName << " on platform 
" << devicePlatform);
         assert(false);
         return;
     }
@@ -126,40 +159,47 @@ static void runTest(const char* deviceName, const char* 
devicePlatform)
     cl_kernel kernel = clCreateKernel(program, "testFunction", &state);
     openclcheck(state);
 
-    // Some random data for the program.
-    constexpr int dataSize = 1000;
+    // data and buffers
+    constexpr size_t localSize = 64;
+    constexpr size_t dataSize = 1000;
+
+    // globalSize rounding to the multiple of localSize
+    const size_t globalWorkSize[] = { ((dataSize + localSize - 1) / localSize) 
* localSize };
+
     cl_float inputData1[dataSize];
     cl_float inputData2[dataSize];
-    cl_float outputData[dataSize];
-    for (int i = 0; i < dataSize; ++i)
+    for (size_t i = 0; i < dataSize; ++i)
     {
-        inputData1[i] = i * 2;
+        inputData1[i] = i * 2.0f;
         inputData2[i] = i % 100;
     }
-    cl_mem input1 = clCreateBuffer(context, CL_MEM_READ_ONLY | 
CL_MEM_USE_HOST_PTR,
-                                   sizeof(cl_float) * dataSize, inputData1, 
&state);
-    openclcheck(state);
-    cl_mem input2 = clCreateBuffer(context, CL_MEM_READ_ONLY | 
CL_MEM_USE_HOST_PTR,
-                                   sizeof(cl_float) * dataSize, inputData2, 
&state);
-    openclcheck(state);
-    cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY | 
CL_MEM_USE_HOST_PTR,
-                                   sizeof(cl_float) * dataSize, outputData, 
&state);
-    openclcheck(state);
-    state = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1);
+
+    cl_mem input1 = clCreateBuffer(context, CL_MEM_READ_ONLY | 
CL_MEM_COPY_HOST_PTR,
+                                   sizeof(inputData1), inputData1, &state);
     openclcheck(state);
-    state = clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2);
+    cl_mem input2 = clCreateBuffer(context, CL_MEM_READ_ONLY | 
CL_MEM_COPY_HOST_PTR,
+                                   sizeof(inputData2), inputData2, &state);
     openclcheck(state);
-    state = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
+    cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
sizeof(cl_float) * globalWorkSize[0],
+                                   nullptr, &state);
     openclcheck(state);
 
-    const size_t globalWorkSize[] = { dataSize };
-    const size_t localSize[1] = { 64 };
-    state = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, globalWorkSize, 
localSize, 0, nullptr,
-                                   nullptr);
-    openclcheck(state);
+    // kernel launch
+    openclcheck(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1));
+    openclcheck(clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2));
+    openclcheck(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output));
+
+    openclcheck(clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, // 1 
dimension, no offset
+                                       globalWorkSize, // 64-multiple
+                                       &localSize, 0, nullptr, nullptr));
+
     openclcheck(clFinish(queue));
-    openclcheck(clEnqueueReadBuffer(queue, output, CL_TRUE, 0, 
sizeof(cl_float) * dataSize,
-                                    outputData, 0, nullptr, nullptr));
+
+    // reading and cleaning
+    cl_float outputData[dataSize]; // we only read what we need
+    openclcheck(clEnqueueReadBuffer(queue, output, CL_TRUE, 0, 
sizeof(outputData), outputData, 0,
+                                    nullptr, nullptr));
+
     clReleaseMemObject(input1);
     clReleaseMemObject(input2);
     clReleaseMemObject(output);

Reply via email to