From: "Yang, Rong R" <[email protected]> Kabylake is almost same as skylake, so use skylake functions directly.
v4: KBL is also gen9. Signed-off-by: Yang Rong <[email protected]> --- backend/src/backend/gen_context.cpp | 2 +- src/cl_device_id.c | 224 ++++++++++++++++++++++++++++-------- src/intel/intel_gpgpu.c | 2 +- 3 files changed, 179 insertions(+), 49 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 2db5ff9..b429ec3 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -3892,7 +3892,7 @@ namespace gbe uint32_t insn_version = 0; if (IS_GEN7(deviceID) || IS_GEN75(deviceID)) insn_version = 7; - else if (IS_GEN8(deviceID) || IS_GEN9(deviceID) || IS_GEN10(deviceID)) + else if (IS_GEN8(deviceID) || IS_GEN9(deviceID)) insn_version = 8; fprintf(file, "%s's disassemble begin:\n", genKernel->getName()); ir::LabelIndex curLabel = (ir::LabelIndex)0; diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 66666ea..a0b0474 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -210,6 +210,61 @@ static struct _cl_device_id intel_bxt_device = { #include "cl_gen9_device.h" }; +static struct _cl_device_id intel_kbl_gt1_device = { + INIT_ICD(dispatch) + .max_compute_unit = 12, + .max_thread_per_unit = 7, + .sub_slice_count = 2, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_kbl_gt15_device = { + INIT_ICD(dispatch) + .max_compute_unit = 18, + .max_thread_per_unit = 7, + .sub_slice_count = 3, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_kbl_gt2_device = { + INIT_ICD(dispatch) + .max_compute_unit = 24, + .max_thread_per_unit = 7, + .sub_slice_count = 3, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_kbl_gt3_device = { + INIT_ICD(dispatch) + .max_compute_unit = 48, + .max_thread_per_unit = 7, + .sub_slice_count = 6, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_kbl_gt4_device = { + INIT_ICD(dispatch) + .max_compute_unit = 72, + .max_thread_per_unit = 7, + .sub_slice_count = 9, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + LOCAL cl_device_id cl_get_gt_device(void) { @@ -580,6 +635,98 @@ bxt_break: cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; + case PCI_CHIP_KABYLAKE_ULT_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1"); + case PCI_CHIP_KABYLAKE_DT_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1"); + case PCI_CHIP_KABYLAKE_HALO_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Halo GT1"); + case PCI_CHIP_KABYLAKE_ULX_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULX GT1"); + case PCI_CHIP_KABYLAKE_SRV_GT1: + DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Server GT1"); +kbl_gt1_break: + intel_kbl_gt1_device.device_id = device_id; + intel_kbl_gt1_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt1_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULT GT1.5"); + case PCI_CHIP_KABYLAKE_DT_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1.5"); + case PCI_CHIP_KABYLAKE_HALO_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Halo GT1.5"); + case PCI_CHIP_KABYLAKE_ULX_GT15: + DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULX GT1.5"); +kbl_gt15_break: + intel_kbl_gt15_device.device_id = device_id; + intel_kbl_gt15_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt15_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT2: + case PCI_CHIP_KABYLAKE_ULT_GT2_1: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULT GT2"); + case PCI_CHIP_KABYLAKE_DT_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Desktop GT2"); + case PCI_CHIP_KABYLAKE_HALO_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Halo GT2"); + case PCI_CHIP_KABYLAKE_ULX_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULX GT2"); + case PCI_CHIP_KABYLAKE_SRV_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Server GT2"); + case PCI_CHIP_KABYLAKE_WKS_GT2: + DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Workstation GT2"); +kbl_gt2_break: + intel_kbl_gt2_device.device_id = device_id; + intel_kbl_gt2_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt2_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_ULT_GT3: + case PCI_CHIP_KABYLAKE_ULT_GT3_1: + case PCI_CHIP_KABYLAKE_ULT_GT3_2: + DECL_INFO_STRING(kbl_gt3_break, intel_kbl_gt3_device, name, "Intel(R) HD Graphics Kabylake ULT GT3"); +kbl_gt3_break: + intel_kbl_gt3_device.device_id = device_id; + intel_kbl_gt3_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt3_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_KABYLAKE_HALO_GT4: + DECL_INFO_STRING(kbl_gt4_break, intel_kbl_gt4_device, name, "Intel(R) HD Graphics Kabylake ULT GT4"); +kbl_gt4_break: + intel_kbl_gt4_device.device_id = device_id; + intel_kbl_gt4_device.platform = cl_get_platform_default(); + ret = &intel_kbl_gt4_device; +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + case PCI_CHIP_SANDYBRIDGE_BRIDGE: case PCI_CHIP_SANDYBRIDGE_GT1: case PCI_CHIP_SANDYBRIDGE_GT2: @@ -787,6 +934,29 @@ cl_get_device_ids(cl_platform_id platform, memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \ return CL_SUCCESS; +LOCAL cl_bool is_gen_device(cl_device_id device) { + return device == &intel_ivb_gt1_device || + device == &intel_ivb_gt2_device || + device == &intel_baytrail_t_device || + device == &intel_hsw_gt1_device || + device == &intel_hsw_gt2_device || + device == &intel_hsw_gt3_device || + device == &intel_brw_gt1_device || + device == &intel_brw_gt2_device || + device == &intel_brw_gt3_device || + device == &intel_chv_device || + device == &intel_skl_gt1_device || + device == &intel_skl_gt2_device || + device == &intel_skl_gt3_device || + device == &intel_skl_gt4_device || + device == &intel_bxt_device || + device == &intel_kbl_gt1_device || + device == &intel_kbl_gt15_device || + device == &intel_kbl_gt2_device || + device == &intel_kbl_gt3_device || + device == &intel_kbl_gt4_device; +} + LOCAL cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, @@ -794,22 +964,7 @@ cl_get_device_info(cl_device_id device, void * param_value, size_t * param_value_size_ret) { - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device - )) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; /* Find the correct parameter */ @@ -909,22 +1064,7 @@ cl_get_device_info(cl_device_id device, LOCAL cl_int cl_device_get_version(cl_device_id device, cl_int *ver) { - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device - )) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; if (ver == NULL) return CL_SUCCESS; @@ -942,6 +1082,10 @@ cl_device_get_version(cl_device_id device, cl_int *ver) || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device || device == &intel_bxt_device) { *ver = 9; + } else if (device == &intel_kbl_gt1_device || device == &intel_kbl_gt2_device + || device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device + || device == &intel_kbl_gt15_device) { + *ver = 10; } else return CL_INVALID_VALUE; @@ -1015,21 +1159,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, { int err = CL_SUCCESS; int dimension = 0; - if (UNLIKELY(device != &intel_ivb_gt1_device && - device != &intel_ivb_gt2_device && - device != &intel_baytrail_t_device && - device != &intel_hsw_gt1_device && - device != &intel_hsw_gt2_device && - device != &intel_hsw_gt3_device && - device != &intel_brw_gt1_device && - device != &intel_brw_gt2_device && - device != &intel_brw_gt3_device && - device != &intel_chv_device && - device != &intel_skl_gt1_device && - device != &intel_skl_gt2_device && - device != &intel_skl_gt3_device && - device != &intel_skl_gt4_device && - device != &intel_bxt_device)) + if (UNLIKELY(is_gen_device(device) == CL_FALSE)) return CL_INVALID_DEVICE; CHECK_KERNEL(kernel); diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index db967e8..1aa87a1 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -2467,7 +2467,7 @@ intel_set_gpgpu_callbacks(int device_id) intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7; return; } - if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id)) { + if (IS_GEN9(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; -- 2.1.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
