LGTM, will push it later, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Xiuli Pan > Sent: Thursday, August 18, 2016 12:57 > To: [email protected] > Cc: Pan, Xiuli <[email protected]> > Subject: [Beignet] [PATCH] Utest: Add test for half type subgroup functions > > From: Pan Xiuli <[email protected]> > > Check if device support subgroup and half first, use build options to hide > code for unsported device. > V2: Fix half part test case for utest multithread. > > Signed-off-by: Pan Xiuli <[email protected]> > --- > kernels/compiler_subgroup_broadcast.cl | 16 ++++- > kernels/compiler_subgroup_reduce.cl | 19 +++++ > kernels/compiler_subgroup_scan_exclusive.cl | 19 +++++ > kernels/compiler_subgroup_scan_inclusive.cl | 19 +++++ > utests/compiler_subgroup_broadcast.cpp | 27 +++++-- > utests/compiler_subgroup_reduce.cpp | 104 > +++++++++++++++++++++++---- > utests/compiler_subgroup_scan_exclusive.cpp | 107 > ++++++++++++++++++++++++---- > utests/compiler_subgroup_scan_inclusive.cpp | 100 > ++++++++++++++++++++++---- > 8 files changed, 367 insertions(+), 44 deletions(-) > > diff --git a/kernels/compiler_subgroup_broadcast.cl > b/kernels/compiler_subgroup_broadcast.cl > index 4f21cf5..8c155ee 100644 > --- a/kernels/compiler_subgroup_broadcast.cl > +++ b/kernels/compiler_subgroup_broadcast.cl > @@ -1,7 +1,7 @@ > /* > * Subgroup broadcast 1D functions > */ > - > +#ifndef HALF > kernel void compiler_subgroup_broadcast_imm_int(global int *src, > global int *dst, > uint simd_id) @@ -32,3 > +32,17 @@ kernel void > compiler_subgroup_broadcast_long(global long *src, > long broadcast_val = sub_group_broadcast(val, simd_id); > dst[index] = broadcast_val; > } > +#else > +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void > +compiler_subgroup_broadcast_half(global half *src, > + global half *dst, > + uint simd_id) { > + uint index = get_global_id(0); > + > + half val = src[index]; > + half broadcast_val = sub_group_broadcast(val, simd_id); > + printf("%d val %d is %d\n",index,as_ushort(val), > +as_ushort(broadcast_val)); > + dst[index] = broadcast_val; > +} > +#endif > diff --git a/kernels/compiler_subgroup_reduce.cl > b/kernels/compiler_subgroup_reduce.cl > index 77ffb07..6d7ecfd 100644 > --- a/kernels/compiler_subgroup_reduce.cl > +++ b/kernels/compiler_subgroup_reduce.cl > @@ -1,6 +1,7 @@ > /* > * Subgroup any all functions > */ > +#ifndef HALF > kernel void compiler_subgroup_any(global int *src, global int *dst) { > int val = src[get_global_id(0)]; > int predicate = sub_group_any(val); > @@ -134,3 +135,21 @@ kernel void > compiler_subgroup_reduce_min_float(global float *src, global float * > float sum = sub_group_reduce_min(val); > dst[get_global_id(0)] = sum; > } > +#else > +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void > +compiler_subgroup_reduce_add_half(global half *src, global half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_reduce_add(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_reduce_max_half(global half *src, global > +half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_reduce_max(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_reduce_min_half(global half *src, global > +half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_reduce_min(val); > + dst[get_global_id(0)] = sum; > +} > +#endif > diff --git a/kernels/compiler_subgroup_scan_exclusive.cl > b/kernels/compiler_subgroup_scan_exclusive.cl > index afc00d0..ca0ada2 100644 > --- a/kernels/compiler_subgroup_scan_exclusive.cl > +++ b/kernels/compiler_subgroup_scan_exclusive.cl > @@ -1,6 +1,7 @@ > /* > * Subgroup scan exclusive add functions > */ > +#ifndef HALF > kernel void compiler_subgroup_scan_exclusive_add_int(global int *src, > global int *dst) { > int val = src[get_global_id(0)]; > int sum = sub_group_scan_exclusive_add(val); > @@ -96,3 +97,21 @@ kernel void > compiler_subgroup_scan_exclusive_min_float(global float *src, global > float sum = sub_group_scan_exclusive_min(val); > dst[get_global_id(0)] = sum; > } > +#else > +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void > +compiler_subgroup_scan_exclusive_add_half(global half *src, global half > +*dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_exclusive_add(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_scan_exclusive_max_half(global half *src, > +global half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_exclusive_max(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_scan_exclusive_min_half(global half *src, > +global half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_exclusive_min(val); > + dst[get_global_id(0)] = sum; > +} > +#endif > diff --git a/kernels/compiler_subgroup_scan_inclusive.cl > b/kernels/compiler_subgroup_scan_inclusive.cl > index da1a6e6..e97521c 100644 > --- a/kernels/compiler_subgroup_scan_inclusive.cl > +++ b/kernels/compiler_subgroup_scan_inclusive.cl > @@ -1,6 +1,7 @@ > /* > * Subgroup scan inclusive add functions > */ > +#ifndef HALF > kernel void compiler_subgroup_scan_inclusive_add_int(global int *src, > global int *dst) { > int val = src[get_global_id(0)]; > int sum = sub_group_scan_inclusive_add(val); > @@ -96,3 +97,21 @@ kernel void > compiler_subgroup_scan_inclusive_min_float(global float *src, global > float sum = sub_group_scan_inclusive_min(val); > dst[get_global_id(0)] = sum; > } > +#else > +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void > +compiler_subgroup_scan_inclusive_add_half(global half *src, global half > +*dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_inclusive_add(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_scan_inclusive_max_half(global half *src, > +global half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_inclusive_max(val); > + dst[get_global_id(0)] = sum; > +} > +kernel void compiler_subgroup_scan_inclusive_min_half(global half *src, > +global half *dst) { > + half val = src[get_global_id(0)]; > + half sum = sub_group_scan_inclusive_min(val); > + dst[get_global_id(0)] = sum; > +} > +#endif > diff --git a/utests/compiler_subgroup_broadcast.cpp > b/utests/compiler_subgroup_broadcast.cpp > index 2835161..9a7979c 100644 > --- a/utests/compiler_subgroup_broadcast.cpp > +++ b/utests/compiler_subgroup_broadcast.cpp > @@ -59,10 +59,15 @@ static void generate_data(T* &input, > /* initially 0, augment after */ > input[gid + lid] = 0; > > - /* check all data types, test ideal for QWORD types */ > - input[gid + lid] += ((rand() % 2 - 1) * base_val); > - /* add trailing random bits, tests GENERAL cases */ > - input[gid + lid] += (rand() % 112); > + if(sizeof(T) == 2) { > + input[gid + lid] = __float_to_half(as_uint((float)(gid + lid))); > + } > + else { > + /* check all data types, test ideal for QWORD types */ > + input[gid + lid] += ((rand() % 2 - 1) * base_val); > + /* add trailing random bits, tests GENERAL cases */ > + input[gid + lid] += (rand() % 112); > + } > > #if DEBUG_STDOUT > /* output generated input */ > @@ -185,3 +190,17 @@ void compiler_subgroup_broadcast_long(void) > subgroup_generic(input, expected); > } > > MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_subgroup_broadca > st_long); > +void compiler_subgroup_broadcast_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_broadcast.cl", > + "compiler_subgroup_broadcast_half", > + SOURCE, "-DHALF"); > + subgroup_generic(input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_broadcast_half); > diff --git a/utests/compiler_subgroup_reduce.cpp > b/utests/compiler_subgroup_reduce.cpp > index 3c3df06..ff545c6 100644 > --- a/utests/compiler_subgroup_reduce.cpp > +++ b/utests/compiler_subgroup_reduce.cpp > @@ -33,7 +33,8 @@ template<class T> > static void compute_expected(WG_FUNCTION wg_func, > T* input, > T* expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > if(wg_func == WG_ANY) > { > @@ -54,24 +55,43 @@ static void compute_expected(WG_FUNCTION > wg_func, > else if(wg_func == WG_REDUCE_ADD) > { > T wg_sum = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - wg_sum += input[i]; > + if(IS_HALF) { > + float wg_sum_tmp = 0.0f; > + for(uint32_t i = 0; i < SIMD_SIZE; i++) { > + wg_sum_tmp += as_float(__half_to_float(input[i])); > + } > + wg_sum = __float_to_half(as_uint(wg_sum_tmp)); > + } > + else { > + for(uint32_t i = 1; i < SIMD_SIZE; i++) > + wg_sum += input[i]; > + } > for(uint32_t i = 0; i < SIMD_SIZE; i++) > expected[i] = wg_sum; > } > else if(wg_func == WG_REDUCE_MAX) > { > T wg_max = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - wg_max = max(input[i], wg_max); > + for(uint32_t i = 1; i < SIMD_SIZE; i++) { > + if (IS_HALF) { > + wg_max = (as_float(__half_to_float(input[i])) > > as_float(__half_to_float(wg_max))) ? input[i] : wg_max; > + } > + else > + wg_max = max(input[i], wg_max); > + } > for(uint32_t i = 0; i < SIMD_SIZE; i++) > expected[i] = wg_max; > } > else if(wg_func == WG_REDUCE_MIN) > { > T wg_min = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - wg_min = min(input[i], wg_min); > + for(uint32_t i = 1; i < SIMD_SIZE; i++) { > + if (IS_HALF) { > + wg_min= (as_float(__half_to_float(input[i])) < > as_float(__half_to_float(wg_min))) ? input[i] : wg_min; > + } > + else > + wg_min = min(input[i], wg_min); > + } > for(uint32_t i = 0; i < SIMD_SIZE; i++) > expected[i] = wg_min; > } > @@ -85,7 +105,8 @@ template<class T> > static void generate_data(WG_FUNCTION wg_func, > T* &input, > T* &expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > input = new T[WG_GLOBAL_SIZE]; > expected = new T[WG_GLOBAL_SIZE]; > @@ -115,6 +136,8 @@ static void generate_data(WG_FUNCTION wg_func, > /* add trailing random bits, tests GENERAL cases */ > input[gid + lid] += (rand() % 112); > /* always last bit is 1, ideal test ALL/ANY */ > + if (IS_HALF) > + input[gid + lid] = __float_to_half(as_uint((float)input[gid + > + lid]/2)); > } else { > input[gid + lid] += rand(); > input[gid + lid] += rand() / ((float)RAND_MAX + 1); @@ -129,7 +152,7 > @@ static void generate_data(WG_FUNCTION wg_func, > } > > /* expected values */ > - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); > + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, > + IS_HALF); > > #if DEBUG_STDOUT > /* output expected input */ > @@ -152,7 +175,8 @@ static void generate_data(WG_FUNCTION wg_func, > template<class T> static void subgroup_generic(WG_FUNCTION wg_func, > T* input, > - T* expected) > + T* expected, > + bool IS_HALF = false) > { > /* get simd size */ > globals[0] = WG_GLOBAL_SIZE; > @@ -161,7 +185,7 @@ static void subgroup_generic(WG_FUNCTION > wg_func, > > OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M > AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si > ze_t),&SIMD_SIZE,NULL); > > /* input and expected data */ > - generate_data(wg_func, input, expected, SIMD_SIZE); > + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); > > /* prepare input for data type */ > OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ > -185,8 +209,22 @@ static void subgroup_generic(WG_FUNCTION wg_func, > for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) > if(((T *)buf_data[1])[i] != *(expected + i)) > { > + if (IS_HALF) { > + float num_computed = as_float(__half_to_float(((T > *)buf_data[1])[i])); > + float num_expected = as_float(__half_to_float(*(expected + i))); > + float num_diff = abs(num_computed - num_expected) / > abs(num_expected); > + if (num_diff > 0.03f) { > + mismatches++; > + } > +#if DEBUG_STDOUT > + /* output mismatch */ > + cout << "Err at " << i << ", " << num_computed > + << " != " << num_expected << " diff: " <<num_diff > +<<endl; #endif > + //} > + } > /* found mismatch on integer, increment */ > - if (numeric_limits<T>::is_integer) { > + else if (numeric_limits<T>::is_integer) { > mismatches++; > > #if DEBUG_STDOUT > @@ -305,6 +343,20 @@ void compiler_subgroup_reduce_add_float(void) > subgroup_generic(WG_REDUCE_ADD, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_float); > +void compiler_subgroup_reduce_add_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", > + "compiler_subgroup_reduce_add_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_REDUCE_ADD, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_half); > > /* > * Workgroup reduce max utest functions @@ -364,6 +416,20 @@ void > compiler_subgroup_reduce_max_float(void) > subgroup_generic(WG_REDUCE_MAX, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_float); > +void compiler_subgroup_reduce_max_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", > + "compiler_subgroup_reduce_max_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_REDUCE_MAX, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_half); > > /* > * Workgroup reduce min utest functions @@ -423,3 +489,17 @@ void > compiler_subgroup_reduce_min_float(void) > subgroup_generic(WG_REDUCE_MIN, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_float); > +void compiler_subgroup_reduce_min_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", > + "compiler_subgroup_reduce_min_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_REDUCE_MIN, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_half); > diff --git a/utests/compiler_subgroup_scan_exclusive.cpp > b/utests/compiler_subgroup_scan_exclusive.cpp > index 1a21b59..e51b78d 100644 > --- a/utests/compiler_subgroup_scan_exclusive.cpp > +++ b/utests/compiler_subgroup_scan_exclusive.cpp > @@ -32,36 +32,56 @@ template<class T> > static void compute_expected(WG_FUNCTION wg_func, > T* input, > T* expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > if(wg_func == WG_SCAN_EXCLUSIVE_ADD) > { > expected[0] = 0; > expected[1] = input[0]; > - for(uint32_t i = 2; i < SIMD_SIZE; i++) > - expected[i] = input[i - 1] + expected[i - 1]; > + for(uint32_t i = 2; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = > __float_to_half(as_uint(as_float(__half_to_float(input[i > - 1])) + > + > as_float(__half_to_float(expected[i - 1])))); > + else > + expected[i] = input[i - 1] + expected[i - 1]; > + } > } > else if(wg_func == WG_SCAN_EXCLUSIVE_MAX) > { > - if(numeric_limits<T>::is_integer) > + if(IS_HALF) > + expected[0] = 0xFC00; > + else if(numeric_limits<T>::is_integer) > expected[0] = numeric_limits<T>::min(); > else > expected[0] = - numeric_limits<T>::infinity(); > > expected[1] = input[0]; > - for(uint32_t i = 2; i < SIMD_SIZE; i++) > - expected[i] = max(input[i - 1], expected[i - 1]); > + for(uint32_t i = 2; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = (as_float(__half_to_float(input[i - 1])) > > as_float(__half_to_float(expected[i - 1]))) ? > + input[i - 1] : expected[i - 1]; > + else > + expected[i] = max(input[i - 1], expected[i - 1]); > + } > } > else if(wg_func == WG_SCAN_EXCLUSIVE_MIN) > { > - if(numeric_limits<T>::is_integer) > + if(IS_HALF) > + expected[0] = 0x7C00; > + else if(numeric_limits<T>::is_integer) > expected[0] = numeric_limits<T>::max(); > else > expected[0] = numeric_limits<T>::infinity(); > > expected[1] = input[0]; > - for(uint32_t i = 2; i < SIMD_SIZE; i++) > - expected[i] = min(input[i - 1], expected[i - 1]); > + for(uint32_t i = 2; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = (as_float(__half_to_float(input[i - 1])) < > as_float(__half_to_float(expected[i - 1]))) ? > + input[i - 1] : expected[i - 1]; > + else > + expected[i] = min(input[i - 1], expected[i - 1]); > + } > } > } > > @@ -73,7 +93,8 @@ template<class T> > static void generate_data(WG_FUNCTION wg_func, > T* &input, > T* &expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > input = new T[WG_GLOBAL_SIZE]; > expected = new T[WG_GLOBAL_SIZE]; > @@ -101,6 +122,8 @@ static void generate_data(WG_FUNCTION wg_func, > input[gid + lid] += ((rand() % 2 - 1) * base_val); > /* add trailing random bits, tests GENERAL cases */ > input[gid + lid] += (rand() % 112); > + if (IS_HALF) > + input[gid + lid] = __float_to_half(as_uint((float)input[gid + > + lid]/2)); > > #if DEBUG_STDOUT > /* output generated input */ > @@ -111,7 +134,7 @@ static void generate_data(WG_FUNCTION wg_func, > } > > /* expected values */ > - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); > + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, > + IS_HALF); > > #if DEBUG_STDOUT > /* output expected input */ > @@ -134,7 +157,8 @@ static void generate_data(WG_FUNCTION wg_func, > template<class T> static void subgroup_generic(WG_FUNCTION wg_func, > T* input, > - T* expected) > + T* expected, > + bool IS_HALF = false) > { > /* get simd size */ > globals[0] = WG_GLOBAL_SIZE; > @@ -143,7 +167,7 @@ static void subgroup_generic(WG_FUNCTION > wg_func, > > OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M > AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si > ze_t),&SIMD_SIZE,NULL); > > /* input and expected data */ > - generate_data(wg_func, input, expected, SIMD_SIZE); > + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); > > /* prepare input for data type */ > OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ > -166,8 +190,21 @@ static void subgroup_generic(WG_FUNCTION wg_func, > for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) > if(((T *)buf_data[1])[i] != *(expected + i)) > { > + if (IS_HALF) { > + float num_computed = as_float(__half_to_float(((T > *)buf_data[1])[i])); > + float num_expected = as_float(__half_to_float(*(expected + i))); > + float num_diff = abs(num_computed - num_expected) / > abs(num_expected); > + if (num_diff > 0.03f) { > + mismatches++; > +#if DEBUG_STDOUT > + /* output mismatch */ > + cout << "Err at " << i << ", " << num_computed > + << " != " << num_expected <<" diff: " <<num_diff <<endl; > +#endif > + } > + } > /* found mismatch on integer, increment */ > - if(numeric_limits<T>::is_integer){ > + else if (numeric_limits<T>::is_integer) { > mismatches++; > > #if DEBUG_STDOUT > @@ -261,6 +298,20 @@ void > compiler_subgroup_scan_exclusive_add_float(void) > subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_f > loat); > +void compiler_subgroup_scan_exclusive_add_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", > + "compiler_subgroup_scan_exclusive_add_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add > _half); > > /* > * Workgroup scan_exclusive max utest functions @@ -320,6 +371,20 @@ > void compiler_subgroup_scan_exclusive_max_float(void) > subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_ > float); > +void compiler_subgroup_scan_exclusive_max_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", > + "compiler_subgroup_scan_exclusive_max_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max > _half); > > /* > * Workgroup scan_exclusive min utest functions @@ -379,3 +444,17 @@ > void compiler_subgroup_scan_exclusive_min_float(void) > subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_f > loat); > +void compiler_subgroup_scan_exclusive_min_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", > + "compiler_subgroup_scan_exclusive_min_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min > _half); > diff --git a/utests/compiler_subgroup_scan_inclusive.cpp > b/utests/compiler_subgroup_scan_inclusive.cpp > index fa32855..0f0df1c 100644 > --- a/utests/compiler_subgroup_scan_inclusive.cpp > +++ b/utests/compiler_subgroup_scan_inclusive.cpp > @@ -32,25 +32,41 @@ template<class T> > static void compute_expected(WG_FUNCTION wg_func, > T* input, > T* expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > if(wg_func == WG_SCAN_INCLUSIVE_ADD) > { > expected[0] = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - expected[i] = input[i] + expected[i - 1]; > + for(uint32_t i = 1; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = > __float_to_half(as_uint(as_float(__half_to_float(input[i])) + > + > as_float(__half_to_float(expected[i - 1])))); > + else > + expected[i] = input[i] + expected[i - 1]; > + } > } > else if(wg_func == WG_SCAN_INCLUSIVE_MAX) > { > expected[0] = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - expected[i] = max(input[i], expected[i - 1]); > + for(uint32_t i = 1; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = (as_float(__half_to_float(input[i])) > > as_float(__half_to_float(expected[i - 1]))) ? > + input[i] : expected[i - 1]; > + else > + expected[i] = max(input[i], expected[i - 1]); > + } > } > else if(wg_func == WG_SCAN_INCLUSIVE_MIN) > { > expected[0] = input[0]; > - for(uint32_t i = 1; i < SIMD_SIZE; i++) > - expected[i] = min(input[i], expected[i - 1]); > + for(uint32_t i = 1; i < SIMD_SIZE; i++) { > + if (IS_HALF) > + expected[i] = (as_float(__half_to_float(input[i])) < > as_float(__half_to_float(expected[i - 1]))) ? > + input[i] : expected[i - 1]; > + else > + expected[i] = min(input[i], expected[i - 1]); > + } > } > } > > @@ -62,7 +78,8 @@ template<class T> > static void generate_data(WG_FUNCTION wg_func, > T* &input, > T* &expected, > - size_t SIMD_SIZE) > + size_t SIMD_SIZE, > + bool IS_HALF) > { > input = new T[WG_GLOBAL_SIZE]; > expected = new T[WG_GLOBAL_SIZE]; > @@ -91,6 +108,8 @@ static void generate_data(WG_FUNCTION wg_func, > input[gid + lid] += ((rand() % 2 - 1) * base_val); > /* add trailing random bits, tests GENERAL cases */ > input[gid + lid] += (rand() % 112); > + if (IS_HALF) > + input[gid + lid] = __float_to_half(as_uint((float)input[gid + > + lid]/2)); > > #if DEBUG_STDOUT > /* output generated input */ > @@ -101,7 +120,7 @@ static void generate_data(WG_FUNCTION wg_func, > } > > /* expected values */ > - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); > + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, > + IS_HALF); > > #if DEBUG_STDOUT > /* output expected input */ > @@ -124,7 +143,8 @@ static void generate_data(WG_FUNCTION wg_func, > template<class T> static void subgroup_generic(WG_FUNCTION wg_func, > T* input, > - T* expected) > + T* expected, > + bool IS_HALF = false) > { > /* get simd size */ > globals[0] = WG_GLOBAL_SIZE; > @@ -133,7 +153,7 @@ static void subgroup_generic(WG_FUNCTION > wg_func, > > OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M > AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si > ze_t),&SIMD_SIZE,NULL); > > /* input and expected data */ > - generate_data(wg_func, input, expected, SIMD_SIZE); > + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); > > /* prepare input for data type */ > OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ > -156,8 +176,21 @@ static void subgroup_generic(WG_FUNCTION wg_func, > for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) > if(((T *)buf_data[1])[i] != *(expected + i)) > { > + if (IS_HALF) { > + float num_computed = as_float(__half_to_float(((T > *)buf_data[1])[i])); > + float num_expected = as_float(__half_to_float(*(expected + i))); > + float num_diff = abs(num_computed - num_expected) / > abs(num_expected); > + if (num_diff > 0.03f) { > + mismatches++; > +#if DEBUG_STDOUT > + /* output mismatch */ > + cout << "Err at " << i << ", " << num_computed > + << " != " << num_expected <<" diff: " <<num_diff <<endl; > +#endif > + } > + } > /* found mismatch on integer, increment */ > - if(numeric_limits<T>::is_integer){ > + else if (numeric_limits<T>::is_integer) { > mismatches++; > > #if DEBUG_STDOUT > @@ -251,6 +284,20 @@ void > compiler_subgroup_scan_inclusive_add_float(void) > subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_fl > oat); > +void compiler_subgroup_scan_inclusive_add_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", > + "compiler_subgroup_scan_inclusive_add_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_ > half); > > /* > * Workgroup scan_inclusive max utest functions @@ -310,6 +357,20 @@ > void compiler_subgroup_scan_inclusive_max_float(void) > subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_f > loat); > +void compiler_subgroup_scan_inclusive_max_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", > + "compiler_subgroup_scan_inclusive_max_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max > _half); > > /* > * Workgroup scan_inclusive min utest functions @@ -369,4 +430,17 @@ > void compiler_subgroup_scan_inclusive_min_float(void) > subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected); } > MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_fl > oat); > - > +void compiler_subgroup_scan_inclusive_min_half(void) > +{ > + if(!cl_check_subgroups()) > + return; > + if(!cl_check_half()) > + return; > + cl_half *input = NULL; > + cl_half *expected = NULL; > + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", > + "compiler_subgroup_scan_inclusive_min_half", > + SOURCE, "-DHALF"); > + subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected, true); } > +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_ > half); > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
