On 11/19/20 3:39 PM, Andreas Beckmann wrote:
> POCL built against LLVM 10 (sid) or LLVM 11 (experimental) causes a 
> autopkgtest regression on armhf in libgpuarray while it succeeded with 
> LLVM 9.

I finally managed to create a plain c reproducer (based on some pocl
test) which dies with this backtrace on abel.d.o:

#0  getEmissionKind () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/include/llvm/IR/DebugInfoMetadata.h:1244
#1  initialize () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LexicalScopes.cpp:53
#2  0xb13a82f0 in computeIntervals () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:979
#3  runOnMachineFunction () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:996
#4  runOnMachineFunction () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:1023
#5  0xb141d6c8 in runOnFunction () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/MachineFunctionPass.cpp:73
#6  0xb1297494 in runOnFunction () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1481
#7  0xb1297750 in runOnModule () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1517
#8  0xb1297ba8 in runOnModule () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1582
#9  run () at 
/build/llvm-toolchain-10-hVI0Qp/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1694
#10 0xb6dfac82 in pocl_llvm_codegen (Device=Device@entry=0x1a3dfc8, 
Modp=0x20102d8, Output=Output@entry=0xbe9be8bc, 
OutputSize=OutputSize@entry=0xbe9be8d0) at ./lib/CL/pocl_llvm_wg.cc:624
#11 0xb6dbf1de in llvm_codegen (output=output@entry=0x1be75e0 
"/home/anbe/.cache/pocl/kcache/AP/PNFEAPBKBFEAKGGNMALGHGJEEKGMJFBFBMDHA/Sdot_kernel/0-0-0/Sdot_kernel.so",
 device_i=device_i@entry=0, kernel=kernel@entry=0xbe9c0290, device=0x1a3dfc8, 
    command=command@entry=0xbe9c02c8, specialize=specialize@entry=0) at 
./lib/CL/devices/common.c:158
#12 0xb6dc0e44 in pocl_check_kernel_disk_cache 
(command=command@entry=0xbe9c02c8, specialized=specialized@entry=0) at 
./lib/CL/devices/common.c:958
#13 0xb6dc1262 in pocl_check_kernel_dlhandle_cache (command=0xbe9c02c8, 
initial_refcount=0, specialize=0) at ./lib/CL/devices/common.c:1081
#14 0xb6d993d4 in program_compile_dynamic_wg_binaries 
(program=program@entry=0x1a18350) at ./lib/CL/pocl_build.c:179
#15 0xb6da9f20 in get_binary_sizes (sizes=0xbe9c03d4, program=0x1a18350) at 
./lib/CL/clGetProgramInfo.c:36
#16 POclGetProgramInfo (program=0x1a18350, param_name=4453, 
param_value_size=128, param_value=0xbe9c03d4, param_value_size_ret=0xbe9c03d0) 
at ./lib/CL/clGetProgramInfo.c:115
#17 0x0045a070 in main () at 975931.c:238

I expect pocl built against llvm 11 (experimental) to fail similarily.
pocl built against llvm 9 (testing) passes.

Sylvestre, could you check whether this is an error on the LLVM side
or is POCL using LLVM incorrectly?


Andreas
#define CL_TARGET_OPENCL_VERSION 220

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char source[] =
"#ifdef DOUBLE_PRECISION\n"
"    #ifdef cl_khr_fp64\n"
"    #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"    #else\n"
"    #pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
"    #endif\n"
"#endif\n"
"\n"
"__kernel void Sdot_kernel( __global float *_X, __global float *_Y, __global float *scratchBuff,\n"
"                                        uint N, uint offx, int incx, uint offy, int incy, int doConj )\n"
"{\n"
"    __global float *X = _X + offx;\n"
"    __global float *Y = _Y + offy;\n"
"    float dotP = (float) 0.0;\n"
"\n"
"    if ( incx < 0 ) {\n"
"        X = X + (N - 1) * abs(incx);\n"
"    }\n"
"    if ( incy < 0 ) {\n"
"        Y = Y + (N - 1) * abs(incy);\n"
"    }\n"
"\n"
"    int gOffset;\n"
"    for( gOffset=(get_global_id(0) * 4); (gOffset + 4 - 1)<N; gOffset+=( get_global_size(0) * 4 ) )\n"
"    {\n"
"        float4 vReg1, vReg2, res;\n"
"\n"
"        #ifdef INCX_NONUNITY\n"
"             vReg1 = (float4)(  (X + (gOffset*incx))[0 + ( incx * 0)],  (X + (gOffset*incx))[0 + ( incx * 1)],  (X + (gOffset*incx))[0 + ( incx * 2)],  (X + (gOffset*incx))[0 + ( incx * 3)]);\n"
"        #else\n"
"            vReg1 = vload4(  0, (__global float *) (X + gOffset) );\n"
"        #endif\n"
"\n"
"        #ifdef INCY_NONUNITY\n"
"             vReg2 = (float4)(  (Y + (gOffset*incy))[0 + ( incy * 0)],  (Y + (gOffset*incy))[0 + ( incy * 1)],  (Y + (gOffset*incy))[0 + ( incy * 2)],  (Y + (gOffset*incy))[0 + ( incy * 3)]);\n"
"        #else\n"
"            vReg2 = vload4(  0, (__global float *) (Y + gOffset) );\n"
"        #endif\n"
"\n"
"        ;\n"
"         res =  vReg1 *  vReg2 ;\n"
"        dotP +=  res .S0 +  res .S1 +  res .S2 +  res .S3;\n"
";          // Add-up elements in the vector to give a scalar\n"
"    }\n"
"\n"
"    // Loop for the last thread to handle the tail part of the vector\n"
"    // Using the same gOffset used above\n"
"    for( ; gOffset<N; gOffset++ )\n"
"    {\n"
"        float sReg1, sReg2, res;\n"
"        sReg1 = X[gOffset * incx];\n"
"        sReg2 = Y[gOffset * incy];\n"
"\n"
"        ;\n"
"             res =  sReg1 *  sReg2 ;\n"
"             dotP =  dotP +  res ;\n"
"        }\n"
"\n"
"    // Note: this has to be called outside any if-conditions- because REDUCTION uses barrier\n"
"    // dotP of work-item 0 will have the final reduced item of the work-group\n"
"    __local float p1753 [ 64 ];\n"
"	uint QKiD0 = get_local_id(0);\n"
"	 p1753 [ QKiD0 ] =  dotP ;\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 32 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 32 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 16 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 16 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 8 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 8 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 4 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 4 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 2 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 2 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 == 0 ) {\n"
"	 dotP  = p1753 [0] + p1753 [1];\n"
"	}\n"
"\n"
"    if( (get_local_id(0)) == 0 ) {\n"
"        scratchBuff[ get_group_id(0) ] = dotP;\n"
"    }\n"
"}\n"
"\n"
;

#define MAX_PLATFORMS 32
#define MAX_DEVICES   32
#define MAX_BINARIES  32

#define OPENCL_ERROR_CASE(ERR) \
  case ERR:                                                             \
  { fprintf (stderr, "" #ERR " in %s on line %i\n", func_name, line);   \
    return 1; }

int
check_cl_error (cl_int cl_err, int line, const char* func_name) {

  switch (cl_err)
    {
    case CL_SUCCESS: return 0;

      OPENCL_ERROR_CASE (CL_DEVICE_NOT_FOUND)
        OPENCL_ERROR_CASE (CL_DEVICE_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_COMPILER_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_MEM_OBJECT_ALLOCATION_FAILURE)
        OPENCL_ERROR_CASE (CL_OUT_OF_RESOURCES)
        OPENCL_ERROR_CASE (CL_OUT_OF_HOST_MEMORY)
        OPENCL_ERROR_CASE (CL_PROFILING_INFO_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_MEM_COPY_OVERLAP)
        OPENCL_ERROR_CASE (CL_IMAGE_FORMAT_MISMATCH)
        OPENCL_ERROR_CASE (CL_IMAGE_FORMAT_NOT_SUPPORTED)
        OPENCL_ERROR_CASE (CL_BUILD_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_MAP_FAILURE)
        OPENCL_ERROR_CASE (CL_MISALIGNED_SUB_BUFFER_OFFSET)
        OPENCL_ERROR_CASE (CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
        OPENCL_ERROR_CASE (CL_COMPILE_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_LINKER_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_LINK_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_DEVICE_PARTITION_FAILED)
        OPENCL_ERROR_CASE (CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_INVALID_VALUE)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE_TYPE)
        OPENCL_ERROR_CASE (CL_INVALID_PLATFORM)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE)
        OPENCL_ERROR_CASE (CL_INVALID_CONTEXT)
        OPENCL_ERROR_CASE (CL_INVALID_QUEUE_PROPERTIES)
        OPENCL_ERROR_CASE (CL_INVALID_COMMAND_QUEUE)
        OPENCL_ERROR_CASE (CL_INVALID_HOST_PTR)
        OPENCL_ERROR_CASE (CL_INVALID_MEM_OBJECT)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_SAMPLER)
        OPENCL_ERROR_CASE (CL_INVALID_BINARY)
        OPENCL_ERROR_CASE (CL_INVALID_BUILD_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_PROGRAM)
        OPENCL_ERROR_CASE (CL_INVALID_PROGRAM_EXECUTABLE)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_NAME)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_DEFINITION)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_INDEX)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_VALUE)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_ARGS)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_DIMENSION)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_GROUP_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_ITEM_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_GLOBAL_OFFSET)
        OPENCL_ERROR_CASE (CL_INVALID_EVENT_WAIT_LIST)
        OPENCL_ERROR_CASE (CL_INVALID_EVENT)
        OPENCL_ERROR_CASE (CL_INVALID_OPERATION)
        OPENCL_ERROR_CASE (CL_INVALID_GL_OBJECT)
        OPENCL_ERROR_CASE (CL_INVALID_BUFFER_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_MIP_LEVEL)
        OPENCL_ERROR_CASE (CL_INVALID_GLOBAL_WORK_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_PROPERTY)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_DESCRIPTOR)
        OPENCL_ERROR_CASE (CL_INVALID_COMPILER_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_LINKER_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE_PARTITION_COUNT)

    default:
      printf ("Unknown OpenCL error %i in %s on line %i\n", cl_err, func_name,
              line);
      return 1;
    }
}

#define _POCLU_CHECK_CL_ERROR_INNER(cond, func, line)                         \
  do                                                                          \
    {                                                                         \
      if (check_cl_error (cond, line, func))                                  \
        return (EXIT_FAILURE);                                                \
    }                                                                         \
  while (0)

#define CHECK_CL_ERROR(cond) _POCLU_CHECK_CL_ERROR_INNER(cond, __PRETTY_FUNCTION__, __LINE__)

#define CHECK_OPENCL_ERROR_IN(message) _POCLU_CHECK_CL_ERROR_INNER(err, message, __LINE__)

#define TEST_ASSERT(EXP)                                                \
do {                                                                    \
  if (!(EXP)) {                                                         \
    fprintf(stderr, "Assertion: \n" #EXP "\nfailed on %s:%i\n",         \
        __FILE__, __LINE__);                                            \
    return EXIT_FAILURE;                                                \
  }                                                                     \
} while (0)

int main(){
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES];
  cl_uint ndevices;
  cl_program program = NULL;
  size_t binsizes[MAX_BINARIES];
  size_t nbinaries;

  CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms));
  TEST_ASSERT(nplatforms > 0);

  CHECK_CL_ERROR(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices));
  TEST_ASSERT(ndevices > 0);

  cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateContext");

  const char * src[] = {source};
  program = clCreateProgramWithSource(context, 1, src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");

  CHECK_CL_ERROR(clBuildProgram(program, 1, devices, "-g -DINCX_NONUNITY -DINCY_NONUNITY", NULL, NULL));

  CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(binsizes), binsizes, &nbinaries));
  printf("binary size: %zd\n", binsizes[0]);

  CHECK_CL_ERROR(clReleaseProgram(program));

  CHECK_CL_ERROR (clReleaseContext (context));

  printf ("OK\n");

  return EXIT_SUCCESS;
}

Reply via email to