One minor comment, there is no user of tile_sz. We can just simply remove it and don't confuse to use dim 2 for tile_sz. We can simply use dim 0 for horizontal alignment and 1 for vertical aligment.
On Tue, Oct 21, 2014 at 09:02:27PM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > For BDW, the vertical align is 4 at least. > This cause the slice pitch twice as big as > the Gen7 for 1D buffer array. > Because the buffer tiling alignment may change > for different GENs, we move it from run time to > intel driver. > > Signed-off-by: Junyan He <[email protected]> > --- > src/cl_driver.h | 3 +++ > src/cl_driver_defs.c | 1 + > src/cl_mem.c | 19 ++++++------------- > src/intel/intel_driver.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > src/intel/intel_gpgpu.c | 2 +- > 5 files changed, 56 insertions(+), 14 deletions(-) > > diff --git a/src/cl_driver.h b/src/cl_driver.h > index e973ba5..0603089 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -360,6 +360,9 @@ extern cl_buffer_wait_rendering_cb > *cl_buffer_wait_rendering; > typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd); > extern cl_buffer_get_fd_cb *cl_buffer_get_fd; > > +typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t > tiling_mode, uint32_t dim); > +extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align; > + > /* Get the device id */ > typedef int (cl_driver_get_device_id_cb)(void); > extern cl_driver_get_device_id_cb *cl_driver_get_device_id; > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c > index 72f25d9..665dad2 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -48,6 +48,7 @@ LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering > = NULL; > LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = > NULL; > LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = > NULL; > LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL; > +LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL; > > /* cl_khr_gl_sharing */ > LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL; > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 077f1d7..59265a3 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -610,13 +610,6 @@ cl_mem_copy_image(struct _cl_mem_image *image, > cl_mem_unmap_auto((cl_mem)image); > } > > -static const uint32_t tile_sz = 4096; /* 4KB per tile */ > -static const uint32_t tilex_w = 512; /* tileX width in bytes */ > -static const uint32_t tilex_h = 8; /* tileX height in number of rows */ > -static const uint32_t tiley_w = 128; /* tileY width in bytes */ > -static const uint32_t tiley_h = 32; /* tileY height in number of rows */ > -static const uint32_t valign = 2; /* vertical alignment is 2. */ > - > cl_image_tiling_t cl_get_default_tiling(void) > { > static int initialized = 0; > @@ -749,13 +742,13 @@ _cl_mem_new_image(cl_context ctx, > /* Tiling requires to align both pitch and height */ > if (tiling == CL_NO_TILE) { > aligned_pitch = w * bpp; > - aligned_h = ALIGN(h, valign); > + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); > } else if (tiling == CL_TILE_X) { > - aligned_pitch = ALIGN(w * bpp, tilex_w); > - aligned_h = ALIGN(h, tilex_h); > + aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, > CL_TILE_X, 0)); > + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1)); > } else if (tiling == CL_TILE_Y) { > - aligned_pitch = ALIGN(w * bpp, tiley_w); > - aligned_h = ALIGN(h, tiley_h); > + aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, > CL_TILE_Y, 0)); > + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1)); > } > > sz = aligned_pitch * aligned_h * depth; > @@ -779,7 +772,7 @@ _cl_mem_new_image(cl_context ctx, > image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) > aligned_slice_pitch = 0; > else > - aligned_slice_pitch = aligned_pitch * ALIGN(h, 2); > + aligned_slice_pitch = aligned_pitch * ALIGN(h, > cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); > > cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt, > intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, > tiling, > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c > index 2c2ed5f..cb466ab 100644 > --- a/src/intel/intel_driver.c > +++ b/src/intel/intel_driver.c > @@ -476,6 +476,50 @@ static int get_cl_tiling(uint32_t drm_tiling) > return CL_NO_TILE; > } > > +static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t > tiling_mode, uint32_t dim) > +{ > + uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver; > + uint32_t ret = 0; > + > + switch (tiling_mode) { > + case CL_TILE_X: > + if (dim == 0) { //tileX width in bytes > + ret = 512; > + } else if (dim == 1) { //tileX height in number of rows > + ret = 8; > + } else if (dim == 2) { //tile SZ > + ret = 4096; > + } else > + assert(0); > + break; > + > + case CL_TILE_Y: > + if (dim == 0) { //tileY width in bytes > + ret = 128; > + } else if (dim == 1) { //tileY height in number of rows > + ret = 32; > + } else if (dim == 2) { //tile SZ > + ret = 4096; > + } else > + assert(0); > + break; > + > + case CL_NO_TILE: > + if (dim == 1) { //vertical alignment > + if (gen_ver == 8) > + ret = 4; > + else > + ret = 2; > + } else if (dim == 2) { //tile SZ > + ret = 4096; > + } else > + assert(0); > + break; > + } > + > + return ret; > +} > + > #if defined(HAS_EGL) > #include "intel_dri_resource_sharing.h" > #include "cl_image.h" > @@ -741,5 +785,6 @@ intel_setup_callbacks(void) > cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata; > cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) > drm_intel_bo_wait_rendering; > cl_buffer_get_fd = (cl_buffer_get_fd_cb *) > drm_intel_bo_gem_export_to_prime; > + cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb > *)intel_buffer_get_tiling_align; > intel_set_gpgpu_callbacks(intel_get_device_id()); > } > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c > index 167d8d9..d379768 100644 > --- a/src/intel/intel_gpgpu.c > +++ b/src/intel/intel_gpgpu.c > @@ -1094,6 +1094,7 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, > ss->ss0.surface_format = format; > if (intel_is_surface_array(type)) { > ss->ss0.surface_array = 1; > + ss->ss1.surface_qpitch = 1; > } > ss->ss0.horizontal_alignment = 1; > ss->ss0.vertical_alignment = 1; > @@ -1117,7 +1118,6 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, > ss->ss3.surface_pitch = pitch - 1; > > ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); > - ss->ss7.red_clear_color = 1; > ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; > ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; > ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; > -- > 1.7.9.5 > > > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
