I am able to reproduce this by just compiling (and not running) that on
its own (as below, with the affected source in bug913141kernel.cl).
Hence, no further action is needed from you.
Switching to LLVM 7 (after fixing the other issue you noted) didn't
change anything. Removing -cl-fast-relaxed-math fixes it, but probably
reduces performance.
(This "kernel" is actually several almost-entirely-separate kernels of
which one is selected by the -D KERNEL_* option, so it isn't really
relevant that the first one doesn't crash.)
//g++ -o bug913141 bug913141.cpp -lOpenCL
//Depends: ocl-icd-opencl-dev, beignet-opencl-icd
#include <iostream>
#include <fstream>
#include <CL/opencl.h>
int main(){
const char* buildflags[3]={"-D TYPE=1 -D Dtype=float -D Dtype2=float2 -D
Dtype4=float4 -D Dtype8=float8 -D Dtype16=float16 -D as_Dtype=as_float
-D as_Dtype2=as_float2 -D as_Dtype4=as_float4 -D as_Dtype8=as_float8 -D
KERNEL_WIDTH=3 -D KERNEL_HEIGHT=3 -D STRIDE_X=1 -D STRIDE_Y=1 -D
DILATION_X=1 -D DILATION_Y=1 -D KERNEL_BASIC -cl-fast-relaxed-math -D
ConvolveBasic=BASIC_k3x3_cn576_g1_s1x1_d1x1_b1_in64x48_p1x1_num1_M512_activ0_eltwise0_FP32_4_1_1_1
-D CHANNELS=576 -D APPLY_BIAS=1 -D OUTPUT_Z=512 -D ZPAR=1 -D
INTEL_DEVICE",//known OK
"-D TYPE=1 -D Dtype=float -D Dtype2=float2 -D Dtype4=float4 -D
Dtype8=float8 -D Dtype16=float16 -D as_Dtype=as_float -D
as_Dtype2=as_float2 -D as_Dtype4=as_float4 -D as_Dtype8=as_float8 -D
KERNEL_WIDTH=3 -D KERNEL_HEIGHT=3 -D STRIDE_X=1 -D STRIDE_Y=1 -D
DILATION_X=1 -D DILATION_Y=1 -D INPUT_PAD_W=1 -D INPUT_PAD_H=1 -D
INPUT_PAD_RIGHT=1 -D INPUT_PAD_BOTTOM=1 -D GEMM_LIKE_CONV_32_1 -D
Conv_Interleaved=U_GEMM_LIKE_CONV_k3x3_cn576_g1_s1x1_d1x1_b1_in64x48_p1x1_num1_M512_activ0_eltwise0_FP32_5_1_8_32_SIMD8
-cl-mad-enable -D KERNEL_GEMM_LIKE -D INPUT_DEPTH=576 -D WIDTH1=512 -D
OUT_PADDING_LEFT=0 -D OUT_PADDING_HEIGHT=0 -D OUT_DEPTH=512 -D
NUM_BATCHES=1 -D DY=1 -D DX=32 -D KERNEL_WIDTH_DIV2=1 -D
KERNEL_SLICE_DIV2=4 -D TILE_N_LAST=0 -D TILE_N_LAST_DIV8=0 -D
APPLY_BIAS=1 -D INTEL_DEVICE",//no -cl-fast-relaxed-math appears OK
"-D TYPE=1 -D Dtype=float -D Dtype2=float2 -D Dtype4=float4 -D
Dtype8=float8 -D Dtype16=float16 -D as_Dtype=as_float -D
as_Dtype2=as_float2 -D as_Dtype4=as_float4 -D as_Dtype8=as_float8 -D
KERNEL_WIDTH=3 -D KERNEL_HEIGHT=3 -D STRIDE_X=1 -D STRIDE_Y=1 -D
DILATION_X=1 -D DILATION_Y=1 -D INPUT_PAD_W=1 -D INPUT_PAD_H=1 -D
INPUT_PAD_RIGHT=1 -D INPUT_PAD_BOTTOM=1 -cl-fast-relaxed-math -D
GEMM_LIKE_CONV_32_1 -D
Conv_Interleaved=U_GEMM_LIKE_CONV_k3x3_cn576_g1_s1x1_d1x1_b1_in64x48_p1x1_num1_M512_activ0_eltwise0_FP32_5_1_8_32_SIMD8
-cl-mad-enable -D KERNEL_GEMM_LIKE -D INPUT_DEPTH=576 -D WIDTH1=512 -D
OUT_PADDING_LEFT=0 -D OUT_PADDING_HEIGHT=0 -D OUT_DEPTH=512 -D
NUM_BATCHES=1 -D DY=1 -D DX=32 -D KERNEL_WIDTH_DIV2=1 -D
KERNEL_SLICE_DIV2=4 -D TILE_N_LAST=0 -D TILE_N_LAST_DIV8=0 -D
APPLY_BIAS=1 -D INTEL_DEVICE"};//known crashing
char kernel_source[200000];
std::ifstream kernel_file("bug913141kernel.cl");
kernel_file.read(kernel_source,190000);
size_t kernel_length=kernel_file.gcount()+1;
kernel_file.close();
kernel_source[kernel_length]=0;
kernel_source[kernel_length-1]=0;
const char* kernel_ptr=kernel_source;
cl_device_id device;
clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL);
cl_context ctx=clCreateContext(NULL,1,&device,NULL,NULL,NULL);
for(int n=0;n<3;n++){
std::cout << "n=" << n << std::endl;
cl_program
program=clCreateProgramWithSource(ctx,1,&kernel_ptr,&kernel_length,NULL);
cl_int status=clBuildProgram(program,1,&device,buildflags[n],NULL,NULL);
std::cout << "status=" << status << std::endl;
}
}