Reviewed-by: Bas Nieuwenhuizen <[email protected]>
On Fri, Apr 20, 2018 at 2:21 PM, Samuel Pitoiset <[email protected]> wrote: > The SI family doesn't support chaining which means the maximum > size in dwords per CS is limited. When that limit was reached > we failed to submit the CS and the application crashed. > > This patch allows to submit up to 4 IBs which is currently the > limit, but recent amdgpu supports more than that. > > Please note that we can reach the limit of 4 IBs per submit > but currently we can't improve that. The only solution is to > upgrade libdrm. That will be improved later but for now this > should fix crashes on SI or when using RADV_DEBUG=noibs. > > Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 > Signed-off-by: Samuel Pitoiset <[email protected]> > --- > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++++++++++++++---- > 1 file changed, 168 insertions(+), 50 deletions(-) > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > index c4b2232ce9e..17e6d8ba2b6 100644 > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > @@ -68,6 +68,10 @@ struct radv_amdgpu_cs { > struct radeon_winsys_bo **virtual_buffers; > uint8_t *virtual_buffer_priorities; > int *virtual_buffer_hash_table; > + > + /* For chips that don't support chaining. */ > + struct radeon_winsys_cs *old_cs_buffers; > + unsigned num_old_cs_buffers; > }; > > static inline struct radv_amdgpu_cs * > @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct > radeon_winsys_cs *rcs) > for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) > cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); > > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { > + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i]; > + free(rcs->buf); > + } > + > + free(cs->old_cs_buffers); > free(cs->old_ib_buffers); > free(cs->virtual_buffers); > free(cs->virtual_buffer_priorities); > @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs > *_cs, size_t min_size) > /* The total ib size cannot exceed limit_dws dwords. */ > if (ib_dws > limit_dws) > { > - cs->failed = true; > + /* The maximum size in dwords has been reached, > + * try to allocate a new one. > + */ > + if (cs->num_old_cs_buffers + 1 >= > AMDGPU_CS_MAX_IBS_PER_SUBMIT) { > + /* TODO: Allow to submit more than 4 IBs. */ > + fprintf(stderr, "amdgpu: Maximum number of > IBs " > + "per submit reached.\n"); > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + cs->old_cs_buffers = > + realloc(cs->old_cs_buffers, > + (cs->num_old_cs_buffers + 1) * > sizeof(*cs->old_cs_buffers)); > + if (!cs->old_cs_buffers) { > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + /* Store the current one for submitting it later. */ > + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = > cs->base.cdw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = > cs->base.max_dw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = > cs->base.buf; > + cs->num_old_cs_buffers++; > + > + /* Reset the cs, it will be re-allocated below. */ > cs->base.cdw = 0; > - return; > + cs->base.buf = NULL; > + > + /* Re-compute the number of dwords to allocate. */ > + ib_dws = MAX2(cs->base.cdw + min_size, > + MIN2(cs->base.max_dw * 2, limit_dws)); > + if (ib_dws > limit_dws) { > + fprintf(stderr, "amdgpu: Too high number of " > + "dwords to allocate\n"); > + cs->failed = true; > + return; > + } > } > > uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4); > @@ -400,6 +447,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs > *_cs) > cs->ib.ib_mc_address = > radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va; > cs->ib_size_ptr = &cs->ib.size; > cs->ib.size = 0; > + } else { > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { > + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i]; > + free(rcs->buf); > + } > + > + free(cs->old_cs_buffers); > + cs->old_cs_buffers = NULL; > + cs->num_old_cs_buffers = 0; > } > } > > @@ -550,7 +606,8 @@ static void radv_amdgpu_cs_execute_secondary(struct > radeon_winsys_cs *_parent, > static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, > struct radeon_winsys_cs **cs_array, > unsigned count, > - struct radv_amdgpu_winsys_bo *extra_bo, > + struct radv_amdgpu_winsys_bo > **extra_bo_array, > + unsigned num_extra_bo, > struct radeon_winsys_cs *extra_cs, > const struct radv_winsys_bo_list > *radv_bo_list, > amdgpu_bo_list_handle *bo_list) > @@ -580,7 +637,7 @@ static int radv_amdgpu_create_bo_list(struct > radv_amdgpu_winsys *ws, > bo_list); > free(handles); > pthread_mutex_unlock(&ws->global_bo_list_lock); > - } else if (count == 1 && !extra_bo && !extra_cs && !radv_bo_list && > + } else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list > && > !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) { > struct radv_amdgpu_cs *cs = (struct > radv_amdgpu_cs*)cs_array[0]; > if (cs->num_buffers == 0) { > @@ -590,8 +647,8 @@ static int radv_amdgpu_create_bo_list(struct > radv_amdgpu_winsys *ws, > r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, > cs->handles, > cs->priorities, bo_list); > } else { > - unsigned total_buffer_count = !!extra_bo; > - unsigned unique_bo_count = !!extra_bo; > + unsigned total_buffer_count = num_extra_bo; > + unsigned unique_bo_count = num_extra_bo; > for (unsigned i = 0; i < count; ++i) { > struct radv_amdgpu_cs *cs = (struct > radv_amdgpu_cs*)cs_array[i]; > total_buffer_count += cs->num_buffers; > @@ -619,9 +676,9 @@ static int radv_amdgpu_create_bo_list(struct > radv_amdgpu_winsys *ws, > return -ENOMEM; > } > > - if (extra_bo) { > - handles[0] = extra_bo->bo; > - priorities[0] = 8; > + for (unsigned i = 0; i < num_extra_bo; i++) { > + handles[i] = extra_bo_array[i]->bo; > + priorities[i] = 8; > } > > for (unsigned i = 0; i < count + !!extra_cs; ++i) { > @@ -773,7 +830,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct > radeon_winsys_ctx *_ctx, > } > } > > - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, > initial_preamble_cs, > + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, > initial_preamble_cs, > radv_bo_list, &bo_list); > if (r) { > fprintf(stderr, "amdgpu: buffer list creation failed for the " > @@ -842,7 +899,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct > radeon_winsys_ctx *_ctx, > > memset(&request, 0, sizeof(request)); > > - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, > NULL, > + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, > NULL, 0, > preamble_cs, radv_bo_list, > &bo_list); > if (r) { > fprintf(stderr, "amdgpu: buffer list creation failed " > @@ -923,68 +980,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct > radeon_winsys_ctx *_ctx, > assert(cs_count); > > for (unsigned i = 0; i < cs_count;) { > - struct amdgpu_cs_ib_info ib = {0}; > - struct radeon_winsys_bo *bo = NULL; > + struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = > {0}; > + unsigned number_of_ibs = 1; > + struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = > {0}; > struct radeon_winsys_cs *preamble_cs = i ? > continue_preamble_cs : initial_preamble_cs; > + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); > uint32_t *ptr; > unsigned cnt = 0; > unsigned size = 0; > unsigned pad_words = 0; > - if (preamble_cs) > - size += preamble_cs->cdw; > > - while (i + cnt < cs_count && 0xffff8 - size >= > radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { > - size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; > - ++cnt; > - } > + if (cs->num_old_cs_buffers > 0) { > + /* Special path when the maximum size in dwords has > + * been reached because we need to handle more than > one > + * IB per submit. > + */ > + unsigned new_cs_count = cs->num_old_cs_buffers + 1; > + struct radeon_winsys_cs *new_cs_array[4]; > + unsigned idx = 0; > + > + for (unsigned j = 0; j < cs->num_old_cs_buffers; j++) > + new_cs_array[idx++] = &cs->old_cs_buffers[j]; > + new_cs_array[idx++] = cs_array[i]; > + > + for (unsigned j = 0; j < new_cs_count; j++) { > + struct radeon_winsys_cs *rcs = > new_cs_array[j]; > + bool needs_preamble = preamble_cs && j == 0; > + unsigned size = 0; > + > + if (needs_preamble) > + size += preamble_cs->cdw; > + size += rcs->cdw; > + > + assert(size < 0xffff8); > + > + while(!size || (size & 7)) { > + size++; > + pad_words++; > + } > > - while(!size || (size & 7)) { > - size++; > - pad_words++; > - } > - assert(cnt); > + bos[j] = ws->buffer_create(ws, 4 * size, 4096, > + RADEON_DOMAIN_GTT, > + > RADEON_FLAG_CPU_ACCESS | > + > RADEON_FLAG_NO_INTERPROCESS_SHARING | > + > RADEON_FLAG_READ_ONLY); > + ptr = ws->buffer_map(bos[j]); > > - bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, > - RADEON_FLAG_CPU_ACCESS | > - RADEON_FLAG_NO_INTERPROCESS_SHARING | > - RADEON_FLAG_READ_ONLY); > - ptr = ws->buffer_map(bo); > + if (needs_preamble) { > + memcpy(ptr, preamble_cs->buf, > preamble_cs->cdw * 4); > + ptr += preamble_cs->cdw; > + } > > - if (preamble_cs) { > - memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); > - ptr += preamble_cs->cdw; > - } > + memcpy(ptr, rcs->buf, 4 * rcs->cdw); > + ptr += rcs->cdw; > > - for (unsigned j = 0; j < cnt; ++j) { > - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i > + j]); > - memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); > - ptr += cs->base.cdw; > + for (unsigned k = 0; k < pad_words; ++k) > + *ptr++ = pad_word; > > - } > + ibs[j].size = size; > + ibs[j].ib_mc_address = > radv_buffer_get_va(bos[j]); > + } > > - for (unsigned j = 0; j < pad_words; ++j) > - *ptr++ = pad_word; > + number_of_ibs = new_cs_count; > + cnt++; > + } else { > + if (preamble_cs) > + size += preamble_cs->cdw; > > - memset(&request, 0, sizeof(request)); > + while (i + cnt < cs_count && 0xffff8 - size >= > radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { > + size += radv_amdgpu_cs(cs_array[i + > cnt])->base.cdw; > + ++cnt; > + } > + > + while (!size || (size & 7)) { > + size++; > + pad_words++; > + } > + assert(cnt); > > + bos[0] = ws->buffer_create(ws, 4 * size, 4096, > + RADEON_DOMAIN_GTT, > + RADEON_FLAG_CPU_ACCESS | > + > RADEON_FLAG_NO_INTERPROCESS_SHARING | > + RADEON_FLAG_READ_ONLY); > + ptr = ws->buffer_map(bos[0]); > + > + if (preamble_cs) { > + memcpy(ptr, preamble_cs->buf, > preamble_cs->cdw * 4); > + ptr += preamble_cs->cdw; > + } > + > + for (unsigned j = 0; j < cnt; ++j) { > + struct radv_amdgpu_cs *cs = > radv_amdgpu_cs(cs_array[i + j]); > + memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); > + ptr += cs->base.cdw; > + > + } > + > + for (unsigned j = 0; j < pad_words; ++j) > + *ptr++ = pad_word; > + > + ibs[0].size = size; > + ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]); > + } > > r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, > - (struct > radv_amdgpu_winsys_bo*)bo, > - preamble_cs, radv_bo_list, > &bo_list); > + (struct radv_amdgpu_winsys_bo > **)bos, > + number_of_ibs, preamble_cs, > + radv_bo_list, &bo_list); > if (r) { > fprintf(stderr, "amdgpu: buffer list creation failed " > "for the sysmem submission (%d)\n", > r); > return r; > } > > - ib.size = size; > - ib.ib_mc_address = radv_buffer_get_va(bo); > + memset(&request, 0, sizeof(request)); > > request.ip_type = cs0->hw_ip; > request.ring = queue_idx; > request.resources = bo_list; > - request.number_of_ibs = 1; > - request.ibs = &ib; > + request.number_of_ibs = number_of_ibs; > + request.ibs = ibs; > request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, > queue_idx); > > sem_info->cs_emit_signal = (i == cs_count - cnt) ? > emit_signal_sem : false; > @@ -1000,9 +1116,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct > radeon_winsys_ctx *_ctx, > if (bo_list) > amdgpu_bo_list_destroy(bo_list); > > - ws->buffer_destroy(bo); > - if (r) > - return r; > + for (unsigned j = 0; j < number_of_ibs; j++) { > + ws->buffer_destroy(bos[j]); > + if (r) > + return r; > + } > > i += cnt; > } > -- > 2.17.0 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
