Could you add a entry for media kernel for name_to_type_mapping or just use a common name for all tracked kernels?
Thanks Haihao > This adds AUB file dump support to generate execution > trace for internal GPU simulator. > > Signed-off-by: Zhenyu Wang <[email protected]> > --- > intel/Makefile.am | 3 +- > intel/intel_bufmgr.h | 38 +++++ > intel/intel_bufmgr_gem.c | 402 > ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 442 insertions(+), 1 deletions(-) > > diff --git a/intel/Makefile.am b/intel/Makefile.am > index 1ae92f8..398cd2f 100644 > --- a/intel/Makefile.am > +++ b/intel/Makefile.am > @@ -41,7 +41,8 @@ libdrm_intel_la_SOURCES = \ > intel_bufmgr_gem.c \ > intel_chipset.h \ > mm.c \ > - mm.h > + mm.h \ > + intel_aub.h > > libdrm_intelincludedir = ${includedir}/libdrm > libdrm_intelinclude_HEADERS = intel_bufmgr.h > diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h > index daa18b4..bb4158a 100644 > --- a/intel/intel_bufmgr.h > +++ b/intel/intel_bufmgr.h > @@ -35,6 +35,7 @@ > #define INTEL_BUFMGR_H > > #include <stdint.h> > +#include <stdio.h> > > struct drm_clip_rect; > > @@ -83,6 +84,39 @@ struct _drm_intel_bo { > int handle; > }; > > +enum drm_intel_aub_bmp_format { > + AUB_DUMP_BMP_LEGACY, > + AUB_DUMP_BMP_8BIT, > + AUB_DUMP_BMP_ARGB_0555, > + AUB_DUMP_BMP_ARGB_0565, > + AUB_DUMP_BMP_ARGB_4444, > + AUB_DUMP_BMP_ARGB_1555, > + AUB_DUMP_BMP_ARGB_0888, > + AUB_DUMP_BMP_ARGB_8888, > + AUB_DUMP_BMP_YCRCB_SWAPY, > + AUB_DUMP_BMP_YCRCB_NORMAL, > + AUB_DUMP_BMP_YCRCB_SWAPUV, > + AUB_DUMP_BMP_YCRCB_SWAPUVY, > + AUB_DUMP_BMP_ABGR_8888, > +}; > + > +/* > + * surface info needed by aub DUMP_BMP block > + */ > +struct drm_intel_aub_surface_bmp { > + uint16_t x_offset; > + uint16_t y_offset; > + uint16_t pitch; > + uint8_t bits_per_pixel; > + uint8_t format; > + uint16_t width; > + uint16_t height; > + uint32_t tiling_walk_y:1; > + uint32_t tiling:1; > + uint32_t pad:30; > +}; > + > + > #define BO_ALLOC_FOR_RENDER (1<<0) > > drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, > @@ -150,6 +184,10 @@ int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); > void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); > > int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); > +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file); > +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr); > +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, drm_intel_bo *bo, > + unsigned int offset, struct > drm_intel_aub_surface_bmp *bmp); > > /* drm_intel_bufmgr_fake.c */ > drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd, > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c > index 3cdffce..654bc31 100644 > --- a/intel/intel_bufmgr_gem.c > +++ b/intel/intel_bufmgr_gem.c > @@ -57,6 +57,7 @@ > #include "intel_bufmgr.h" > #include "intel_bufmgr_priv.h" > #include "intel_chipset.h" > +#include "intel_aub.h" > #include "string.h" > > #include "i915_drm.h" > @@ -75,6 +76,13 @@ struct drm_intel_gem_bo_bucket { > unsigned long size; > }; > > +struct drm_intel_aub_bmp { > + drm_intel_bo *bo; /* surface bo */ > + unsigned int offset; > + struct drm_intel_aub_surface_bmp bmp; > + struct drm_intel_aub_bmp *next; > +}; > + > typedef struct _drm_intel_bufmgr_gem { > drm_intel_bufmgr bufmgr; > > @@ -106,6 +114,10 @@ typedef struct _drm_intel_bufmgr_gem { > unsigned int has_relaxed_fencing : 1; > unsigned int bo_reuse : 1; > char fenced_relocs; > + > + FILE *aub_file; > + uint32_t aub_offset; > + struct drm_intel_aub_bmp *aub_bmp; > } drm_intel_bufmgr_gem; > > #define DRM_INTEL_RELOC_FENCE (1<<0) > @@ -195,8 +207,396 @@ struct _drm_intel_bo_gem { > * relocations. > */ > int reloc_tree_fences; > + > + uint32_t aub_offset; > }; > > +/* AUB trace dump support */ > + > +static void > +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) > +{ > + fwrite(&data, 1, 4, bufmgr_gem->aub_file); > +} > + > +static void > +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) > +{ > + fwrite(data, 1, size, bufmgr_gem->aub_file); > +} > + > +static void > +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) > bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + uint32_t *data; > + unsigned int i; > + > + data = malloc(bo->size); > + drm_intel_bo_get_subdata(bo, offset, size, data); > + > + /* Easy mode: write out bo with no relocations */ > + if (!bo_gem->reloc_count) { > + aub_out_data(bufmgr_gem, data, size); > + free(data); > + return; > + } > + > + /* Otherwise, handle the relocations while writing. */ > + for (i = 0; i < size / 4; i++) { > + int r; > + for (r = 0; r < bo_gem->reloc_count; r++) { > + struct drm_i915_gem_relocation_entry *reloc; > + drm_intel_reloc_target *info; > + > + reloc = &bo_gem->relocs[r]; > + info = &bo_gem->reloc_target_info[r]; > + > + if (reloc->offset == offset + i * 4) { > + drm_intel_bo_gem *target_gem; > + uint32_t val; > + > + target_gem = (drm_intel_bo_gem *)info->bo; > + > + val = reloc->delta; > + val += target_gem->aub_offset; > + > + aub_out(bufmgr_gem, val); > + data[i] = val; > + break; > + } > + } > + if (r == bo_gem->reloc_count) { > + /* no relocation, just the data */ > + aub_out(bufmgr_gem, data[i]); > + } > + } > +} > + > +static void > +aub_bo_get_address(drm_intel_bo *bo) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) > bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + > + /* Give the object a graphics address in the AUB file. We > + * don't just use the GEM object address because we do AUB > + * dumping before execution -- we want to successfully log > + * when the hardware might hang, and we might even want to aub > + * capture for a driver trying to execute on a different > + * generation of hardware by disabling the actual kernel exec > + * call. > + */ > + bo_gem->aub_offset = bufmgr_gem->aub_offset; > + bufmgr_gem->aub_offset += bo->size; > + /* XXX: Handle aperture overflow. */ > + assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); > +} > + > +static const struct { > + const char *name; > + uint32_t type; > + uint32_t subtype; > +} name_to_type_mapping[] = { > + { "VS_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VS_STATE}, > + { "GS_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_GS_STATE}, > + { "CLIP_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_STATE}, > + { "SF_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_STATE}, > + { "WM_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_WM_STATE}, > + { "CC_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE}, > + { "CLIP_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_VP}, > + { "SF_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_VP}, > + { "SF_SCISSOR_UNIT", > + AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_SCISSOR_RECT}, > + { "CC_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_VP}, > + { "SAMPLER", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SAMPLER_STATE}, > + { "SAMPLER_DEFAULT_COLOR", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SDC}, > + { "VS_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, > + { "GS_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, > + { "CLIP_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, > + { "SF_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, > + { "WM_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, > + { "BLEND_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_BLEND_STATE}, > + { "DEPTH_STENCIL_STATE", > + AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DEPTH_STENCIL_STATE}, > + { "COLOR_CALC_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE}, > + { "SS_SURF_BIND", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_BINDING_TABLE}, > + { "SS_SURFACE", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_SURFACE_STATE}, > + { "temporary VBO", AUB_TRACE_TYPE_VERTEX_BUFFER, 0}, > + { "CURBE", AUB_TRACE_TYPE_CONSTANT_URB, 0}, > + { "VS constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0}, > + { "WM constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0}, > + { "INTERFACE_DESC", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_INTERFACE_DESC}, > + { "VLD_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VLD_STATE}, > + { "VFE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VFE_STATE}, > + { "IT_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IT_STATE}, > + { "DI_SAMPLE_STATE", AUB_TRACE_TYPE_GENERAL, > AUB_TRACE_DI_SAMPLE_STATE}, > + { "IEF_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IEF_STATE}, > + { "AVS_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_AVS_STATE}, > +}; > + > +static void > +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, > + uint32_t offset, uint32_t size) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) > bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + > + aub_out(bufmgr_gem, > + CMD_AUB_TRACE_HEADER_BLOCK | > + (5 - 2)); > + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT | type | > AUB_TRACE_OP_DATA_WRITE); > + aub_out(bufmgr_gem, subtype); > + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); > + aub_out(bufmgr_gem, size); > + aub_write_bo_data(bo, offset, size); > +} > + > +static void > +aub_write_bo(drm_intel_bo *bo) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) > bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + uint32_t type = AUB_TRACE_TYPE_NOTYPE; > + uint32_t subtype = 0; > + uint32_t block_size; > + uint32_t offset; > + unsigned int i; > + > + aub_bo_get_address(bo); > + > + for (i = 0; i < ARRAY_SIZE(name_to_type_mapping); i++) { > + if (strcmp(bo_gem->name, > + name_to_type_mapping[i].name) == 0) { > + type = name_to_type_mapping[i].type; > + subtype = name_to_type_mapping[i].subtype; > + break; > + } > + } > + > + if (type == 0) { > + DBG("Failed to find type for object %s(size: 0x%lx, > aub_offset: 0x%08x)\n", > + bo_gem->name, bo->size, bo_gem->aub_offset); > + } > + > + > + /* Break up large objects into multiple writes. Otherwise a > + * 128kb VBO would overflow the 16 bits of size field in the > + * packet header and everything goes badly after that. > + */ > + for (offset = 0; offset < bo->size; offset += block_size) { > + block_size = bo->size - offset; > + > + if (block_size > 2 * 4096) > + block_size = 2 * 4096; > + > + aub_write_trace_block(bo, type, subtype, > + offset, block_size); > + } > +} > + > +/* > + * Make a ringbuffer on fly and dump it > + */ > +static void > +aub_generate_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, > + uint32_t batch_buffer, unsigned int flags) > +{ > + uint32_t ringbuffer[1024]; > + int ring = 0; > + > + switch (flags) { > + case I915_EXEC_RENDER: > + case I915_EXEC_DEFAULT: > + ring = AUB_TRACE_TYPE_RING_PRB0; > + break; > + case I915_EXEC_BSD: > + ring = AUB_TRACE_TYPE_RING_PRB1; > + break; > + case I915_EXEC_BLT: > + ring = AUB_TRACE_TYPE_RING_PRB2; > + break; > + } > + > + aub_out(bufmgr_gem, > + CMD_AUB_TRACE_HEADER_BLOCK | > + (5 - 2)); > + aub_out(bufmgr_gem, > + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); > + aub_out(bufmgr_gem, 0); /* general/surface subtype */ > + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); > + aub_out(bufmgr_gem, 4096); > + > + /* Do make a ring buffer here */ > + memset(ringbuffer, AUB_MI_NOOP, sizeof(ringbuffer)); > + ringbuffer[0] = AUB_MI_BATCH_BUFFER_START; > + ringbuffer[1] = batch_buffer; > + > + /* FIXME: Need some flush operations here? */ > + > + aub_out_data(bufmgr_gem, ringbuffer, 4096); > + > + /* Update offset pointer */ > + bufmgr_gem->aub_offset += 4096; > +} > + > +static void > +aub_dump_bmp(drm_intel_bufmgr_gem *bufmgr_gem) > +{ > + struct drm_intel_aub_bmp *p = bufmgr_gem->aub_bmp; > + > + while(p) { > + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); > + aub_out(bufmgr_gem, (p->bmp.y_offset << 16) | > p->bmp.x_offset); > + aub_out(bufmgr_gem, (p->bmp.format << 24) | > + (p->bmp.bits_per_pixel << 16) | > p->bmp.pitch); > + aub_out(bufmgr_gem, (p->bmp.height << 16) | p->bmp.width); > + /* surface bo should already be written out */ > + assert(((drm_intel_bo_gem *)p->bo)->aub_offset != 0); > + aub_out(bufmgr_gem, ((drm_intel_bo_gem *)p->bo)->aub_offset + > p->offset); > + aub_out(bufmgr_gem, (p->bmp.tiling << 2) | > (p->bmp.tiling_walk_y << 3)); > + > + bufmgr_gem->aub_bmp = p->next; > + free(p); > + p = bufmgr_gem->aub_bmp; > + } > +} > + > +static void > +aub_exec(drm_intel_bo *bo, unsigned int flags) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) > bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + int i; > + > + if (!bufmgr_gem->aub_file) > + return; > + > + /* Write out all but the batchbuffer to AUB memory */ > + for (i = 0; i < bufmgr_gem->exec_count - 1; i++) { > + if (bufmgr_gem->exec_bos[i] != bo) > + aub_write_bo(bufmgr_gem->exec_bos[i]); > + } > + > + aub_bo_get_address(bo); > + > + /* Dump the batchbuffer. */ > + aub_out(bufmgr_gem, > + CMD_AUB_TRACE_HEADER_BLOCK | > + (5 - 2)); > + aub_out(bufmgr_gem, > + AUB_TRACE_MEMTYPE_GTT | AUB_TRACE_TYPE_BATCH | > AUB_TRACE_OP_DATA_WRITE); > + aub_out(bufmgr_gem, 0); /* general/surface subtype */ > + aub_out(bufmgr_gem, bo_gem->aub_offset); > + aub_out(bufmgr_gem, bo_gem->bo.size); > + aub_write_bo_data(bo, 0, bo_gem->bo.size); > + > + /* Dump ring buffer */ > + aub_generate_ringbuffer(bufmgr_gem, bo_gem->aub_offset, flags); > + > + /* Dump BMP file for any requested surface */ > + aub_dump_bmp(bufmgr_gem); > + > + fflush(bufmgr_gem->aub_file); > + > + /* > + * One frame has been dumped. So reset the aub_offset for the next > frame. > + * > + * FIXME: Can we do this? > + */ > + bufmgr_gem->aub_offset = 0x10000; > +} > + > +/* > + * Stop dumping data to aub file > + */ > +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; > + > + pthread_mutex_lock(&bufmgr_gem->lock); > + bufmgr_gem->aub_file = NULL; > + pthread_mutex_unlock(&bufmgr_gem->lock); > +} > + > +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; > + int entry = 0x3; /* uc/valid GTT */ > + int i; > + > + if (!file) > + return; > + > + pthread_mutex_lock(&bufmgr_gem->lock); > + > + bufmgr_gem->aub_file = file; > + > + /* Start from 0x10000, since the address below is used for GTT entry > building */ > + bufmgr_gem->aub_offset = 0x10000; > + > + /* Start with a (required) version packet. */ > + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); > + aub_out(bufmgr_gem, > + (4 << AUB_HEADER_MAJOR_SHIFT) | > + (0 << AUB_HEADER_MINOR_SHIFT)); > + for (i = 0; i < 8; i++) { > + aub_out(bufmgr_gem, 0); /* app name */ > + } > + aub_out(bufmgr_gem, 0); /* timestamp */ > + aub_out(bufmgr_gem, 0); /* timestamp */ > + aub_out(bufmgr_gem, 0); /* comment len */ > + > + /* Set up the GTT. The max we can handle is 256M. > + * Need improvement, dynamicly alloc/write GTT entry > + * block for each bo, so AubList output won't contain > + * whole GTT entry block in the first, easier for parse. > + */ > + for (i = 0x000; i < 0x10000; i += 4, entry += 0x1000) { > + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); > + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | > AUB_TRACE_OP_DATA_WRITE); > + aub_out(bufmgr_gem, 0); > + aub_out(bufmgr_gem, i); > + aub_out(bufmgr_gem, 4); > + aub_out(bufmgr_gem, entry); > + } > + > + pthread_mutex_unlock(&bufmgr_gem->lock); > +} > + > +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, > + drm_intel_bo *bo, unsigned int offset, > + struct drm_intel_aub_surface_bmp *bmp) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; > + struct drm_intel_aub_bmp *aub_bmp, *p, *last; > + > + aub_bmp = malloc(sizeof(*aub_bmp)); > + > + aub_bmp->bo = bo; > + aub_bmp->offset = offset; > + memcpy(&aub_bmp->bmp, bmp, sizeof(*bmp)); > + aub_bmp->next = NULL; > + > + pthread_mutex_lock(&bufmgr_gem->lock); > + > + /* Insert last */ > + p = last = bufmgr_gem->aub_bmp; > + while (p) { > + last = p; > + p = p->next; > + } > + if (last == bufmgr_gem->aub_bmp) > + bufmgr_gem->aub_bmp = aub_bmp; > + else > + last->next = aub_bmp; > + > + pthread_mutex_unlock(&bufmgr_gem->lock); > + > + return 0; > +} > + > static unsigned int > drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); > > @@ -1624,6 +2024,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, > execbuf.rsvd1 = 0; > execbuf.rsvd2 = 0; > > + aub_exec(bo, flags); > + > ret = drmIoctl(bufmgr_gem->fd, > DRM_IOCTL_I915_GEM_EXECBUFFER2, > &execbuf); > -- > 1.7.2.3 > _______________________________________________ Intel-gfx mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/intel-gfx
