From: Pan Xiuli <[email protected]> Check if device support subgroup and half first, use build options to hide code for unsported device. V2: Fix half part test case for utest multithread.
Signed-off-by: Pan Xiuli <[email protected]> --- kernels/compiler_subgroup_broadcast.cl | 16 ++++- kernels/compiler_subgroup_reduce.cl | 19 +++++ kernels/compiler_subgroup_scan_exclusive.cl | 19 +++++ kernels/compiler_subgroup_scan_inclusive.cl | 19 +++++ utests/compiler_subgroup_broadcast.cpp | 27 +++++-- utests/compiler_subgroup_reduce.cpp | 104 +++++++++++++++++++++++---- utests/compiler_subgroup_scan_exclusive.cpp | 107 ++++++++++++++++++++++++---- utests/compiler_subgroup_scan_inclusive.cpp | 100 ++++++++++++++++++++++---- 8 files changed, 367 insertions(+), 44 deletions(-) diff --git a/kernels/compiler_subgroup_broadcast.cl b/kernels/compiler_subgroup_broadcast.cl index 4f21cf5..8c155ee 100644 --- a/kernels/compiler_subgroup_broadcast.cl +++ b/kernels/compiler_subgroup_broadcast.cl @@ -1,7 +1,7 @@ /* * Subgroup broadcast 1D functions */ - +#ifndef HALF kernel void compiler_subgroup_broadcast_imm_int(global int *src, global int *dst, uint simd_id) @@ -32,3 +32,17 @@ kernel void compiler_subgroup_broadcast_long(global long *src, long broadcast_val = sub_group_broadcast(val, simd_id); dst[index] = broadcast_val; } +#else +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void compiler_subgroup_broadcast_half(global half *src, + global half *dst, + uint simd_id) +{ + uint index = get_global_id(0); + + half val = src[index]; + half broadcast_val = sub_group_broadcast(val, simd_id); + printf("%d val %d is %d\n",index,as_ushort(val), as_ushort(broadcast_val)); + dst[index] = broadcast_val; +} +#endif diff --git a/kernels/compiler_subgroup_reduce.cl b/kernels/compiler_subgroup_reduce.cl index 77ffb07..6d7ecfd 100644 --- a/kernels/compiler_subgroup_reduce.cl +++ b/kernels/compiler_subgroup_reduce.cl @@ -1,6 +1,7 @@ /* * Subgroup any all functions */ +#ifndef HALF kernel void compiler_subgroup_any(global int *src, global int *dst) { int val = src[get_global_id(0)]; int predicate = sub_group_any(val); @@ -134,3 +135,21 @@ kernel void compiler_subgroup_reduce_min_float(global float *src, global float * float sum = sub_group_reduce_min(val); dst[get_global_id(0)] = sum; } +#else +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void compiler_subgroup_reduce_add_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_reduce_add(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_reduce_max_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_reduce_max(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_reduce_min_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_reduce_min(val); + dst[get_global_id(0)] = sum; +} +#endif diff --git a/kernels/compiler_subgroup_scan_exclusive.cl b/kernels/compiler_subgroup_scan_exclusive.cl index afc00d0..ca0ada2 100644 --- a/kernels/compiler_subgroup_scan_exclusive.cl +++ b/kernels/compiler_subgroup_scan_exclusive.cl @@ -1,6 +1,7 @@ /* * Subgroup scan exclusive add functions */ +#ifndef HALF kernel void compiler_subgroup_scan_exclusive_add_int(global int *src, global int *dst) { int val = src[get_global_id(0)]; int sum = sub_group_scan_exclusive_add(val); @@ -96,3 +97,21 @@ kernel void compiler_subgroup_scan_exclusive_min_float(global float *src, global float sum = sub_group_scan_exclusive_min(val); dst[get_global_id(0)] = sum; } +#else +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void compiler_subgroup_scan_exclusive_add_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_exclusive_add(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_scan_exclusive_max_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_exclusive_max(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_scan_exclusive_min_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_exclusive_min(val); + dst[get_global_id(0)] = sum; +} +#endif diff --git a/kernels/compiler_subgroup_scan_inclusive.cl b/kernels/compiler_subgroup_scan_inclusive.cl index da1a6e6..e97521c 100644 --- a/kernels/compiler_subgroup_scan_inclusive.cl +++ b/kernels/compiler_subgroup_scan_inclusive.cl @@ -1,6 +1,7 @@ /* * Subgroup scan inclusive add functions */ +#ifndef HALF kernel void compiler_subgroup_scan_inclusive_add_int(global int *src, global int *dst) { int val = src[get_global_id(0)]; int sum = sub_group_scan_inclusive_add(val); @@ -96,3 +97,21 @@ kernel void compiler_subgroup_scan_inclusive_min_float(global float *src, global float sum = sub_group_scan_inclusive_min(val); dst[get_global_id(0)] = sum; } +#else +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void compiler_subgroup_scan_inclusive_add_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_inclusive_add(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_scan_inclusive_max_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_inclusive_max(val); + dst[get_global_id(0)] = sum; +} +kernel void compiler_subgroup_scan_inclusive_min_half(global half *src, global half *dst) { + half val = src[get_global_id(0)]; + half sum = sub_group_scan_inclusive_min(val); + dst[get_global_id(0)] = sum; +} +#endif diff --git a/utests/compiler_subgroup_broadcast.cpp b/utests/compiler_subgroup_broadcast.cpp index 2835161..9a7979c 100644 --- a/utests/compiler_subgroup_broadcast.cpp +++ b/utests/compiler_subgroup_broadcast.cpp @@ -59,10 +59,15 @@ static void generate_data(T* &input, /* initially 0, augment after */ input[gid + lid] = 0; - /* check all data types, test ideal for QWORD types */ - input[gid + lid] += ((rand() % 2 - 1) * base_val); - /* add trailing random bits, tests GENERAL cases */ - input[gid + lid] += (rand() % 112); + if(sizeof(T) == 2) { + input[gid + lid] = __float_to_half(as_uint((float)(gid + lid))); + } + else { + /* check all data types, test ideal for QWORD types */ + input[gid + lid] += ((rand() % 2 - 1) * base_val); + /* add trailing random bits, tests GENERAL cases */ + input[gid + lid] += (rand() % 112); + } #if DEBUG_STDOUT /* output generated input */ @@ -185,3 +190,17 @@ void compiler_subgroup_broadcast_long(void) subgroup_generic(input, expected); } MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_subgroup_broadcast_long); +void compiler_subgroup_broadcast_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_broadcast.cl", + "compiler_subgroup_broadcast_half", + SOURCE, "-DHALF"); + subgroup_generic(input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_broadcast_half); diff --git a/utests/compiler_subgroup_reduce.cpp b/utests/compiler_subgroup_reduce.cpp index 3c3df06..ff545c6 100644 --- a/utests/compiler_subgroup_reduce.cpp +++ b/utests/compiler_subgroup_reduce.cpp @@ -33,7 +33,8 @@ template<class T> static void compute_expected(WG_FUNCTION wg_func, T* input, T* expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { if(wg_func == WG_ANY) { @@ -54,24 +55,43 @@ static void compute_expected(WG_FUNCTION wg_func, else if(wg_func == WG_REDUCE_ADD) { T wg_sum = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - wg_sum += input[i]; + if(IS_HALF) { + float wg_sum_tmp = 0.0f; + for(uint32_t i = 0; i < SIMD_SIZE; i++) { + wg_sum_tmp += as_float(__half_to_float(input[i])); + } + wg_sum = __float_to_half(as_uint(wg_sum_tmp)); + } + else { + for(uint32_t i = 1; i < SIMD_SIZE; i++) + wg_sum += input[i]; + } for(uint32_t i = 0; i < SIMD_SIZE; i++) expected[i] = wg_sum; } else if(wg_func == WG_REDUCE_MAX) { T wg_max = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - wg_max = max(input[i], wg_max); + for(uint32_t i = 1; i < SIMD_SIZE; i++) { + if (IS_HALF) { + wg_max = (as_float(__half_to_float(input[i])) > as_float(__half_to_float(wg_max))) ? input[i] : wg_max; + } + else + wg_max = max(input[i], wg_max); + } for(uint32_t i = 0; i < SIMD_SIZE; i++) expected[i] = wg_max; } else if(wg_func == WG_REDUCE_MIN) { T wg_min = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - wg_min = min(input[i], wg_min); + for(uint32_t i = 1; i < SIMD_SIZE; i++) { + if (IS_HALF) { + wg_min= (as_float(__half_to_float(input[i])) < as_float(__half_to_float(wg_min))) ? input[i] : wg_min; + } + else + wg_min = min(input[i], wg_min); + } for(uint32_t i = 0; i < SIMD_SIZE; i++) expected[i] = wg_min; } @@ -85,7 +105,8 @@ template<class T> static void generate_data(WG_FUNCTION wg_func, T* &input, T* &expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { input = new T[WG_GLOBAL_SIZE]; expected = new T[WG_GLOBAL_SIZE]; @@ -115,6 +136,8 @@ static void generate_data(WG_FUNCTION wg_func, /* add trailing random bits, tests GENERAL cases */ input[gid + lid] += (rand() % 112); /* always last bit is 1, ideal test ALL/ANY */ + if (IS_HALF) + input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2)); } else { input[gid + lid] += rand(); input[gid + lid] += rand() / ((float)RAND_MAX + 1); @@ -129,7 +152,7 @@ static void generate_data(WG_FUNCTION wg_func, } /* expected values */ - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF); #if DEBUG_STDOUT /* output expected input */ @@ -152,7 +175,8 @@ static void generate_data(WG_FUNCTION wg_func, template<class T> static void subgroup_generic(WG_FUNCTION wg_func, T* input, - T* expected) + T* expected, + bool IS_HALF = false) { /* get simd size */ globals[0] = WG_GLOBAL_SIZE; @@ -161,7 +185,7 @@ static void subgroup_generic(WG_FUNCTION wg_func, OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL); /* input and expected data */ - generate_data(wg_func, input, expected, SIMD_SIZE); + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); /* prepare input for data type */ OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ -185,8 +209,22 @@ static void subgroup_generic(WG_FUNCTION wg_func, for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) if(((T *)buf_data[1])[i] != *(expected + i)) { + if (IS_HALF) { + float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i])); + float num_expected = as_float(__half_to_float(*(expected + i))); + float num_diff = abs(num_computed - num_expected) / abs(num_expected); + if (num_diff > 0.03f) { + mismatches++; + } +#if DEBUG_STDOUT + /* output mismatch */ + cout << "Err at " << i << ", " << num_computed + << " != " << num_expected << " diff: " <<num_diff <<endl; +#endif + //} + } /* found mismatch on integer, increment */ - if (numeric_limits<T>::is_integer) { + else if (numeric_limits<T>::is_integer) { mismatches++; #if DEBUG_STDOUT @@ -305,6 +343,20 @@ void compiler_subgroup_reduce_add_float(void) subgroup_generic(WG_REDUCE_ADD, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_float); +void compiler_subgroup_reduce_add_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", + "compiler_subgroup_reduce_add_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_REDUCE_ADD, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_half); /* * Workgroup reduce max utest functions @@ -364,6 +416,20 @@ void compiler_subgroup_reduce_max_float(void) subgroup_generic(WG_REDUCE_MAX, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_float); +void compiler_subgroup_reduce_max_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", + "compiler_subgroup_reduce_max_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_REDUCE_MAX, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_half); /* * Workgroup reduce min utest functions @@ -423,3 +489,17 @@ void compiler_subgroup_reduce_min_float(void) subgroup_generic(WG_REDUCE_MIN, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_float); +void compiler_subgroup_reduce_min_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl", + "compiler_subgroup_reduce_min_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_REDUCE_MIN, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_half); diff --git a/utests/compiler_subgroup_scan_exclusive.cpp b/utests/compiler_subgroup_scan_exclusive.cpp index 1a21b59..e51b78d 100644 --- a/utests/compiler_subgroup_scan_exclusive.cpp +++ b/utests/compiler_subgroup_scan_exclusive.cpp @@ -32,36 +32,56 @@ template<class T> static void compute_expected(WG_FUNCTION wg_func, T* input, T* expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { if(wg_func == WG_SCAN_EXCLUSIVE_ADD) { expected[0] = 0; expected[1] = input[0]; - for(uint32_t i = 2; i < SIMD_SIZE; i++) - expected[i] = input[i - 1] + expected[i - 1]; + for(uint32_t i = 2; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = __float_to_half(as_uint(as_float(__half_to_float(input[i - 1])) + + as_float(__half_to_float(expected[i - 1])))); + else + expected[i] = input[i - 1] + expected[i - 1]; + } } else if(wg_func == WG_SCAN_EXCLUSIVE_MAX) { - if(numeric_limits<T>::is_integer) + if(IS_HALF) + expected[0] = 0xFC00; + else if(numeric_limits<T>::is_integer) expected[0] = numeric_limits<T>::min(); else expected[0] = - numeric_limits<T>::infinity(); expected[1] = input[0]; - for(uint32_t i = 2; i < SIMD_SIZE; i++) - expected[i] = max(input[i - 1], expected[i - 1]); + for(uint32_t i = 2; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = (as_float(__half_to_float(input[i - 1])) > as_float(__half_to_float(expected[i - 1]))) ? + input[i - 1] : expected[i - 1]; + else + expected[i] = max(input[i - 1], expected[i - 1]); + } } else if(wg_func == WG_SCAN_EXCLUSIVE_MIN) { - if(numeric_limits<T>::is_integer) + if(IS_HALF) + expected[0] = 0x7C00; + else if(numeric_limits<T>::is_integer) expected[0] = numeric_limits<T>::max(); else expected[0] = numeric_limits<T>::infinity(); expected[1] = input[0]; - for(uint32_t i = 2; i < SIMD_SIZE; i++) - expected[i] = min(input[i - 1], expected[i - 1]); + for(uint32_t i = 2; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = (as_float(__half_to_float(input[i - 1])) < as_float(__half_to_float(expected[i - 1]))) ? + input[i - 1] : expected[i - 1]; + else + expected[i] = min(input[i - 1], expected[i - 1]); + } } } @@ -73,7 +93,8 @@ template<class T> static void generate_data(WG_FUNCTION wg_func, T* &input, T* &expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { input = new T[WG_GLOBAL_SIZE]; expected = new T[WG_GLOBAL_SIZE]; @@ -101,6 +122,8 @@ static void generate_data(WG_FUNCTION wg_func, input[gid + lid] += ((rand() % 2 - 1) * base_val); /* add trailing random bits, tests GENERAL cases */ input[gid + lid] += (rand() % 112); + if (IS_HALF) + input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2)); #if DEBUG_STDOUT /* output generated input */ @@ -111,7 +134,7 @@ static void generate_data(WG_FUNCTION wg_func, } /* expected values */ - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF); #if DEBUG_STDOUT /* output expected input */ @@ -134,7 +157,8 @@ static void generate_data(WG_FUNCTION wg_func, template<class T> static void subgroup_generic(WG_FUNCTION wg_func, T* input, - T* expected) + T* expected, + bool IS_HALF = false) { /* get simd size */ globals[0] = WG_GLOBAL_SIZE; @@ -143,7 +167,7 @@ static void subgroup_generic(WG_FUNCTION wg_func, OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL); /* input and expected data */ - generate_data(wg_func, input, expected, SIMD_SIZE); + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); /* prepare input for data type */ OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ -166,8 +190,21 @@ static void subgroup_generic(WG_FUNCTION wg_func, for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) if(((T *)buf_data[1])[i] != *(expected + i)) { + if (IS_HALF) { + float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i])); + float num_expected = as_float(__half_to_float(*(expected + i))); + float num_diff = abs(num_computed - num_expected) / abs(num_expected); + if (num_diff > 0.03f) { + mismatches++; +#if DEBUG_STDOUT + /* output mismatch */ + cout << "Err at " << i << ", " << num_computed + << " != " << num_expected <<" diff: " <<num_diff <<endl; +#endif + } + } /* found mismatch on integer, increment */ - if(numeric_limits<T>::is_integer){ + else if (numeric_limits<T>::is_integer) { mismatches++; #if DEBUG_STDOUT @@ -261,6 +298,20 @@ void compiler_subgroup_scan_exclusive_add_float(void) subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_float); +void compiler_subgroup_scan_exclusive_add_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", + "compiler_subgroup_scan_exclusive_add_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_half); /* * Workgroup scan_exclusive max utest functions @@ -320,6 +371,20 @@ void compiler_subgroup_scan_exclusive_max_float(void) subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_float); +void compiler_subgroup_scan_exclusive_max_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", + "compiler_subgroup_scan_exclusive_max_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_half); /* * Workgroup scan_exclusive min utest functions @@ -379,3 +444,17 @@ void compiler_subgroup_scan_exclusive_min_float(void) subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_float); +void compiler_subgroup_scan_exclusive_min_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl", + "compiler_subgroup_scan_exclusive_min_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_half); diff --git a/utests/compiler_subgroup_scan_inclusive.cpp b/utests/compiler_subgroup_scan_inclusive.cpp index fa32855..0f0df1c 100644 --- a/utests/compiler_subgroup_scan_inclusive.cpp +++ b/utests/compiler_subgroup_scan_inclusive.cpp @@ -32,25 +32,41 @@ template<class T> static void compute_expected(WG_FUNCTION wg_func, T* input, T* expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { if(wg_func == WG_SCAN_INCLUSIVE_ADD) { expected[0] = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - expected[i] = input[i] + expected[i - 1]; + for(uint32_t i = 1; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = __float_to_half(as_uint(as_float(__half_to_float(input[i])) + + as_float(__half_to_float(expected[i - 1])))); + else + expected[i] = input[i] + expected[i - 1]; + } } else if(wg_func == WG_SCAN_INCLUSIVE_MAX) { expected[0] = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - expected[i] = max(input[i], expected[i - 1]); + for(uint32_t i = 1; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = (as_float(__half_to_float(input[i])) > as_float(__half_to_float(expected[i - 1]))) ? + input[i] : expected[i - 1]; + else + expected[i] = max(input[i], expected[i - 1]); + } } else if(wg_func == WG_SCAN_INCLUSIVE_MIN) { expected[0] = input[0]; - for(uint32_t i = 1; i < SIMD_SIZE; i++) - expected[i] = min(input[i], expected[i - 1]); + for(uint32_t i = 1; i < SIMD_SIZE; i++) { + if (IS_HALF) + expected[i] = (as_float(__half_to_float(input[i])) < as_float(__half_to_float(expected[i - 1]))) ? + input[i] : expected[i - 1]; + else + expected[i] = min(input[i], expected[i - 1]); + } } } @@ -62,7 +78,8 @@ template<class T> static void generate_data(WG_FUNCTION wg_func, T* &input, T* &expected, - size_t SIMD_SIZE) + size_t SIMD_SIZE, + bool IS_HALF) { input = new T[WG_GLOBAL_SIZE]; expected = new T[WG_GLOBAL_SIZE]; @@ -91,6 +108,8 @@ static void generate_data(WG_FUNCTION wg_func, input[gid + lid] += ((rand() % 2 - 1) * base_val); /* add trailing random bits, tests GENERAL cases */ input[gid + lid] += (rand() % 112); + if (IS_HALF) + input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2)); #if DEBUG_STDOUT /* output generated input */ @@ -101,7 +120,7 @@ static void generate_data(WG_FUNCTION wg_func, } /* expected values */ - compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE); + compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF); #if DEBUG_STDOUT /* output expected input */ @@ -124,7 +143,8 @@ static void generate_data(WG_FUNCTION wg_func, template<class T> static void subgroup_generic(WG_FUNCTION wg_func, T* input, - T* expected) + T* expected, + bool IS_HALF = false) { /* get simd size */ globals[0] = WG_GLOBAL_SIZE; @@ -133,7 +153,7 @@ static void subgroup_generic(WG_FUNCTION wg_func, OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL); /* input and expected data */ - generate_data(wg_func, input, expected, SIMD_SIZE); + generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF); /* prepare input for data type */ OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@ -156,8 +176,21 @@ static void subgroup_generic(WG_FUNCTION wg_func, for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) if(((T *)buf_data[1])[i] != *(expected + i)) { + if (IS_HALF) { + float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i])); + float num_expected = as_float(__half_to_float(*(expected + i))); + float num_diff = abs(num_computed - num_expected) / abs(num_expected); + if (num_diff > 0.03f) { + mismatches++; +#if DEBUG_STDOUT + /* output mismatch */ + cout << "Err at " << i << ", " << num_computed + << " != " << num_expected <<" diff: " <<num_diff <<endl; +#endif + } + } /* found mismatch on integer, increment */ - if(numeric_limits<T>::is_integer){ + else if (numeric_limits<T>::is_integer) { mismatches++; #if DEBUG_STDOUT @@ -251,6 +284,20 @@ void compiler_subgroup_scan_inclusive_add_float(void) subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_float); +void compiler_subgroup_scan_inclusive_add_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", + "compiler_subgroup_scan_inclusive_add_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_half); /* * Workgroup scan_inclusive max utest functions @@ -310,6 +357,20 @@ void compiler_subgroup_scan_inclusive_max_float(void) subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_float); +void compiler_subgroup_scan_inclusive_max_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", + "compiler_subgroup_scan_inclusive_max_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_half); /* * Workgroup scan_inclusive min utest functions @@ -369,4 +430,17 @@ void compiler_subgroup_scan_inclusive_min_float(void) subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected); } MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_float); - +void compiler_subgroup_scan_inclusive_min_half(void) +{ + if(!cl_check_subgroups()) + return; + if(!cl_check_half()) + return; + cl_half *input = NULL; + cl_half *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl", + "compiler_subgroup_scan_inclusive_min_half", + SOURCE, "-DHALF"); + subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected, true); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_half); -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
