Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations
Gfx10 remembers sample positions in compressed Z/S memory, so the hw doesn't need the decompress pass for shader loads. Marek On Wed, May 22, 2019 at 4:20 PM Marek Olšák wrote: > The depth decompress pass needs to know the sample locations. > > If shader loads read from compressed depth, the texture hardware will > always use the standard locations for decompression. > > Marek > > On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen > wrote: > >> So this does not seem to use the sample locations during layout >> transitions? >> >> AFAIK those are needed for e.g. HTILE decompression as it is based on >> equations somehow. >> >> On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset >> wrote: >> > >> > Basically, this extension allows applications to use custom >> > sample locations. It doesn't support variable sample locations >> > during subpass. Note that we don't have to upload the user >> > sample locations because the spec doesn't allow this. >> > >> > Only enabled on VI+ because it's untested on older chips. >> > >> > v2: - change sampleLocationCoordinateRange[1] to 0.9375 >> > - compute and emit PA_SC_CENTROID_PRIORITY_{0,1} >> > - rebased on top of master >> > - some cleanups >> > >> > Signed-off-by: Samuel Pitoiset >> > --- >> > src/amd/vulkan/radv_cmd_buffer.c | 223 ++ >> > src/amd/vulkan/radv_device.c | 27 >> > src/amd/vulkan/radv_extensions.py | 1 + >> > src/amd/vulkan/radv_pipeline.c| 30 >> > src/amd/vulkan/radv_private.h | 26 +++- >> > 5 files changed, 300 insertions(+), 7 deletions(-) >> > >> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c >> b/src/amd/vulkan/radv_cmd_buffer.c >> > index 4f592bc7f68..fb79c1c6713 100644 >> > --- a/src/amd/vulkan/radv_cmd_buffer.c >> > +++ b/src/amd/vulkan/radv_cmd_buffer.c >> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer >> *cmd_buffer, >> > dest->viewport.count = src->viewport.count; >> > dest->scissor.count = src->scissor.count; >> > dest->discard_rectangle.count = src->discard_rectangle.count; >> > + dest->sample_location.count = src->sample_location.count; >> > >> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) { >> > if (memcmp(&dest->viewport.viewports, >> &src->viewport.viewports, >> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer >> *cmd_buffer, >> > } >> > } >> > >> > + if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { >> > + if (dest->sample_location.per_pixel != >> src->sample_location.per_pixel || >> > + dest->sample_location.grid_size.width != >> src->sample_location.grid_size.width || >> > + dest->sample_location.grid_size.height != >> src->sample_location.grid_size.height || >> > + memcmp(&dest->sample_location.locations, >> > + &src->sample_location.locations, >> > + src->sample_location.count * >> sizeof(VkSampleLocationEXT))) { >> > + dest->sample_location.per_pixel = >> src->sample_location.per_pixel; >> > + dest->sample_location.grid_size = >> src->sample_location.grid_size; >> > + typed_memcpy(dest->sample_location.locations, >> > +src->sample_location.locations, >> > +src->sample_location.count); >> > + dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; >> > + } >> > + } >> > + >> > cmd_buffer->state.dirty |= dest_mask; >> > } >> > >> > @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct >> radv_cmd_buffer *cmd_buffer, >> > } >> > } >> > >> > +/** >> > + * Convert the user sample locations to hardware sample locations (the >> values >> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). >> > + */ >> > +static void >> > +radv_convert_user_sample_locs(struct radv_sample_locations_state >> *state, >> > + uint32_t x, uint32_t y, VkOffset2D >> *sample_locs) >> > +{ >> > + uint32_t x_offset = x % state->grid_size.width; >> > + uint32_t y_offset = y % state->grid_size.height; >> > + uint32_t num_samples = (uint32_t)state->per_pixel; >> > + VkSampleLocationEXT *user_locs; >> > + uint32_t pixel_offset; >> > + >> > + pixel_offset = (x_offset + y_offset * state->grid_size.width) * >> num_samples; >> > + >> > + assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); >> > + user_locs = &state->locations[pixel_offset]; >> > + >> > + for (uint32_t i = 0; i < num_samples; i++) { >> > + float shifted_pos_x = user_locs[i].x - 0.5; >> > + float shifted_pos_y = user_locs[i].y - 0.5; >> > + >> > + int32_t scaled_pos_x = floor(shifted_pos_x * 16); >> > + int32_t scaled_pos_y = floor(shifted_pos_y * 16); >> > + >> > +
[Mesa-dev] [PATCH] android: virgl: fix generated virgl_driinfo.h building rules
Changelog in Android makefile: - Add LOCAL_MODULE_CLASS, intermediates and LOCAL_GENERATED_SOURCES - Use LOCAL_EXPORT_C_INCLUDE_DIRS to export $(intermediates) path - Move generated header rules before 'include $(BUILD_STATIC_LIBRARY)' Fixes the following building error: In file included from external/mesa/src/gallium/targets/dri/target.c:1: external/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h:257:16: fatal error: 'virgl/virgl_driinfo.h' file not found #include "virgl/virgl_driinfo.h" ^~~ 1 error generated. Fixes: cf800998a ("virgl: Add driinfo file and tie it into the build") Signed-off-by: Mauro Rossi --- src/gallium/drivers/virgl/Android.mk | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/virgl/Android.mk b/src/gallium/drivers/virgl/Android.mk index f77bcf196e..585ed7b2ce 100644 --- a/src/gallium/drivers/virgl/Android.mk +++ b/src/gallium/drivers/virgl/Android.mk @@ -30,8 +30,9 @@ LOCAL_SRC_FILES := \ LOCAL_MODULE := libmesa_pipe_virgl -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +intermediates := $(call local-generated-sources-dir) +LOCAL_GENERATED_SOURCES += $(intermediates)/virgl/virgl_driinfo.h GEN_DRIINFO_INPUTS := \ $(MESA_TOP)/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h \ @@ -44,6 +45,11 @@ $(intermediates)/virgl/virgl_driinfo.h: $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS) @echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))" $(hide) $(MESA_PYTHON2) $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS) > $@ || ($(RM) $@; false) +LOCAL_EXPORT_C_INCLUDE_DIRS += $(intermediates) + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + ifneq ($(HAVE_GALLIUM_VIRGL),) GALLIUM_TARGET_DRIVERS += virtio_gpu $(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl_common libmesa_winsys_virgl libmesa_winsys_virgl_vtest) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac: change ac_query_gpu_info() signatures
On Fri, Jun 21, 2019 at 1:13 PM Emil Velikov wrote: > From: Emil Velikov > > Currently libdrm_amdgpu provides a typedef of the various handles. While > the goal was to make those opaque, it effectively became part of the API > > To the best of my knowledge there are two ways to have opaque handles: > - "typedef void *foo;" - rather messy IMHO > - "stuct foo;" and use "struct foo *" through the API > > In our case amdgpU_device_handle is used only internally, plus > respective code is not used or applicable for r300 and r600. Hence we > copied the typedef. > > Seemingly this will be a problem since a libdrm_amdgpu wants to change > the API, while not updating the code(?). > libdrm_amdgpu can't change the API. Reviewed-by: Marek Olšák Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/6] gallium/util: Make it possible to disable persistent maps in the upload manager
Reviewed-by: Marek Olšák Marek On Wed, Jun 19, 2019 at 4:42 AM Thomas Hellström (VMware) < tho...@shipmail.org> wrote: > From: Thomas Hellstrom > > For svga, the use of persistent / coherent maps is typically slightly > slower than without them. It's probably a bit case-dependent and > possible to tune, but for now, make sure we can disable those. > > Signed-off-by: Thomas Hellstrom > Reviewed-by: Brian Paul > --- > src/gallium/auxiliary/util/u_upload_mgr.c | 14 -- > src/gallium/auxiliary/util/u_upload_mgr.h | 4 > 2 files changed, 16 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c > b/src/gallium/auxiliary/util/u_upload_mgr.c > index c2c0ba957e3..73f6cae0b6d 100644 > --- a/src/gallium/auxiliary/util/u_upload_mgr.c > +++ b/src/gallium/auxiliary/util/u_upload_mgr.c > @@ -106,8 +106,10 @@ u_upload_clone(struct pipe_context *pipe, struct > u_upload_mgr *upload) > struct u_upload_mgr *result = u_upload_create(pipe, > upload->default_size, > upload->bind, > upload->usage, > upload->flags); > - if (upload->map_persistent && > - upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT) > + if (!upload->map_persistent && result->map_persistent) > + u_upload_disable_persistent(result); > + else if (upload->map_persistent && > +upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT) >u_upload_enable_flush_explicit(result); > > return result; > @@ -121,6 +123,14 @@ u_upload_enable_flush_explicit(struct u_upload_mgr > *upload) > upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; > } > > +void > +u_upload_disable_persistent(struct u_upload_mgr *upload) > +{ > + upload->map_persistent = FALSE; > + upload->map_flags &= ~(PIPE_TRANSFER_COHERENT | > PIPE_TRANSFER_PERSISTENT); > + upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; > +} > + > static void > upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying) > { > diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h > b/src/gallium/auxiliary/util/u_upload_mgr.h > index 80832016272..6a4a60963fe 100644 > --- a/src/gallium/auxiliary/util/u_upload_mgr.h > +++ b/src/gallium/auxiliary/util/u_upload_mgr.h > @@ -73,6 +73,10 @@ u_upload_clone(struct pipe_context *pipe, struct > u_upload_mgr *upload); > void > u_upload_enable_flush_explicit(struct u_upload_mgr *upload); > > +/** Whether to avoid persistent mappings where available */ > +void > +u_upload_disable_persistent(struct u_upload_mgr *upload); > + > /** > * Destroy the upload manager. > */ > -- > 2.20.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface
Both patches pushed :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/9] panfrost: Report UBO count
We look at the highest set bit in the UBO enable mask to work out the maximum indexable UBO, i.e. the UBO count as we need to report to the hardware. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 5af6dcdb9c3..6fde645a12c 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -981,6 +981,17 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned unreachable("No constant buffer"); } +/* Compute number of UBOs active (more specifically, compute the highest UBO + * number addressable -- if there are gaps, include them in the count anyway) + * */ + +static unsigned +panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage) +{ +unsigned mask = ctx->constant_buffer[stage].enabled_mask; +return 32 - __builtin_clz(mask); +} + /* Go through dirty flags and actualise them in the cmdstream. */ void @@ -1060,8 +1071,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; /* Set late due to depending on render state */ -/* The one at the end seems to mean "1 UBO" */ -unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | 0x1; + +/* The bottom bits seem to mean UBO count */ +unsigned ubo_count = panfrost_ubo_count(ctx, PIPE_SHADER_FRAGMENT); +unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | ubo_count; /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/9] panfrost: Constant buffer refactor
We refactor panfrost_constant_buffer to mirror v3d's constant buffer handling, to enable UBOs as well as a single set of uniforms. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 66 ++ src/gallium/drivers/panfrost/pan_context.h | 6 +- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index a803a8b68cb..5af6dcdb9c3 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -967,6 +967,20 @@ static void panfrost_upload_sysvals(struct panfrost_context *ctx, void *buf, } } +static const void * +panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned index) +{ +struct pipe_constant_buffer *cb = &buf->cb[index]; +struct panfrost_resource *rsrc = pan_resource(cb->buffer); + +if (rsrc) +return rsrc->bo->cpu; +else if (cb->user_buffer) +return cb->user_buffer; +else +unreachable("No constant buffer"); +} + /* Go through dirty flags and actualise them in the cmdstream. */ void @@ -1193,16 +1207,23 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) ? fs : vs; +/* Uniforms are implicitly UBO #0 */ +bool has_uniforms = buf->enabled_mask & (1 << 0); + /* Allocate room for the sysval and the uniforms */ size_t sys_size = sizeof(float) * 4 * ss->sysval_count; -size_t size = sys_size + buf->size; +size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0; +size_t size = sys_size + uniform_size; struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); /* Upload sysvals requested by the shader */ panfrost_upload_sysvals(ctx, transfer.cpu, ss, i); /* Upload uniforms */ -memcpy(transfer.cpu + sys_size, buf->buffer, buf->size); +if (has_uniforms) { +const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0); +memcpy(transfer.cpu + sys_size, cpu, uniform_size); +} int uniform_count = 0; @@ -1236,7 +1257,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) postfix->uniforms = transfer.gpu; postfix->uniform_buffers = ubufs; -buf->dirty = 0; +buf->dirty_mask = 0; } /* TODO: Upload the viewport somewhere more appropriate */ @@ -1940,43 +1961,18 @@ panfrost_set_constant_buffer( struct panfrost_context *ctx = pan_context(pctx); struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; -size_t sz = buf ? buf->buffer_size : 0; - -/* Free previous buffer */ - -pbuf->dirty = true; -pbuf->size = sz; +util_copy_constant_buffer(&pbuf->cb[index], buf); -if (pbuf->buffer) { -ralloc_free(pbuf->buffer); -pbuf->buffer = NULL; -} - -/* If unbinding, we're done */ +unsigned mask = (1 << index); -if (!buf) -return; - -/* Multiple constant buffers not yet supported */ -assert(index == 0); - -const uint8_t *cpu; - -struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); - -if (rsrc) { -cpu = rsrc->bo->cpu; -} else if (buf->user_buffer) { -cpu = buf->user_buffer; -} else { -DBG("No constant buffer?\n"); +if (unlikely(!buf)) { +pbuf->enabled_mask &= ~mask; +pbuf->dirty_mask &= ~mask; return; } -/* Copy the constant buffer into the driver context for later upload */ - -pbuf->buffer = rzalloc_size(ctx, sz); -memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); +pbuf->enabled_mask |= mask; +pbuf->dirty_mask |= mask; } static void diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 1f718bcd9c4..21d1d4c8d46 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -67,9 +67,9 @@ struct prim_convert_context; lval &= ~(bit); struct panfrost_constant_buffer { -bool dirty; -size_t size; -void *buffer; +struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; +uint32_t enabled_mask; +uint32_t dirty_mask; }; st
[Mesa-dev] [PATCH 5/9] panfrost: Identify "uniform buffer count" bits
We've known about this for a while, but it was never formally in the machine header files / decoder, so let's add them in. Signed-off-by: Alyssa Rosenzweig --- .../drivers/panfrost/include/panfrost-job.h| 15 +++ src/gallium/drivers/panfrost/pan_context.c | 18 +- .../drivers/panfrost/pandecode/decode.c| 10 +- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h index 6da86148cd7..fbef4efdc32 100644 --- a/src/gallium/drivers/panfrost/include/panfrost-job.h +++ b/src/gallium/drivers/panfrost/include/panfrost-job.h @@ -399,7 +399,7 @@ enum mali_format { #define MALI_ALPHA_COVERAGE(clampf) ((uint16_t) (int) (clampf * 15.0f)) #define MALI_GET_ALPHA_COVERAGE(nibble) ((float) nibble / 15.0f) -/* Applies to unknown1 */ +/* Applies to midgard1.flags */ /* Should the hardware perform early-Z testing? Normally should be set * for performance reasons. Clear if you use: discard, @@ -407,19 +407,19 @@ enum mali_format { * forward-pixel kill; we're not quite sure which bit is which yet. * TODO: How does this interact with blending?*/ -#define MALI_EARLY_Z (1 << 10) +#define MALI_EARLY_Z (1 << 6) /* Should the hardware calculate derivatives (via helper invocations)? Set in a * fragment shader that uses texturing or derivative functions */ -#define MALI_HELPER_INVOCATIONS (1 << 11) +#define MALI_HELPER_INVOCATIONS (1 << 7) /* Flags denoting the fragment shader's use of tilebuffer readback. If the * shader might read any part of the tilebuffer, set MALI_READS_TILEBUFFER. If * it might read depth/stencil in particular, also set MALI_READS_ZS */ -#define MALI_READS_ZS (1 << 12) -#define MALI_READS_TILEBUFFER (1 << 16) +#define MALI_READS_ZS (1 << 8) +#define MALI_READS_TILEBUFFER (1 << 12) /* The raw Midgard blend payload can either be an equation or a shader * address, depending on the context */ @@ -538,9 +538,8 @@ struct mali_shader_meta { u32 unk1 : 28; // = 0x80 for vertex, 0x958020 for tiler } bifrost1; struct { -/* 0x200 except MALI_NO_ALPHA_TO_COVERAGE. Mysterious 1 - * other times. Who knows really? */ -u16 unknown1; +unsigned uniform_buffer_count : 4; +unsigned flags : 12; /* Whole number of uniform registers used, times two; * whole number of work registers used (no scale). diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 34eafdfdf0b..b3950950b4f 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1049,7 +1049,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; /* Who knows */ -vs->tripipe->midgard1.unknown1 = 0x2201; +vs->tripipe->midgard1.flags = 0x220; +vs->tripipe->midgard1.uniform_buffer_count = 1; ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; } @@ -1088,11 +1089,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->blend->has_blend_shader) ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; -/* Set late due to depending on render state */ - -/* The bottom bits seem to mean UBO count */ unsigned ubo_count = panfrost_ubo_count(ctx, PIPE_SHADER_FRAGMENT); -unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | ubo_count; +ctx->fragment_shader_core.midgard1.uniform_buffer_count = ubo_count; + +/* Set late due to depending on render state */ +unsigned flags = MALI_EARLY_Z | 0x20 | 0x200; /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ @@ -1100,7 +1101,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]) flags |= MALI_HELPER_INVOCATIONS; -ctx->fragment_shader_core.midgard1.unknown1 = flags; +ctx->fragment_shader_core.midgard1.flags = flags; /* Assign texture/sample count right before upload */ ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; @@ -1119,9 +1120,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_ver
[Mesa-dev] [PATCH 3/9] panfrost: Allow for dynamic UBO count
We already uploaded UBOs, but only a fixed number (1) for uniforms; let's upload as many as we compute we need. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 27 +- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 6fde645a12c..b1c234fba7c 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -982,13 +982,14 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned } /* Compute number of UBOs active (more specifically, compute the highest UBO - * number addressable -- if there are gaps, include them in the count anyway) - * */ + * number addressable -- if there are gaps, include them in the count anyway). + * We always include UBO #0 in the count, since we *need* uniforms enabled for + * sysvals. */ static unsigned panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage) { -unsigned mask = ctx->constant_buffer[stage].enabled_mask; +unsigned mask = ctx->constant_buffer[stage].enabled_mask | 1; return 32 - __builtin_clz(mask); } @@ -1257,16 +1258,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unreachable("Invalid shader stage\n"); } -/* Also attach the same buffer as a UBO for extended access */ +/* Next up, attach UBOs. UBO #0 is the uniforms we just + * uploaded */ -struct mali_uniform_buffer_meta uniform_buffers[] = { -{ -.size = MALI_POSITIVE((2 + uniform_count)), -.ptr = transfer.gpu >> 2, -}, -}; +unsigned ubo_count = panfrost_ubo_count(ctx, i); +assert(ubo_count >= 1); -mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); +size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count; +struct mali_uniform_buffer_meta *ubos = calloc(sz, 1); + +/* Upload uniforms as a UBO */ +ubos[0].size = MALI_POSITIVE((2 + uniform_count)); +ubos[0].ptr = transfer.gpu >> 2; + +mali_ptr ubufs = panfrost_upload_transient(ctx, ubos, sz); postfix->uniforms = transfer.gpu; postfix->uniform_buffers = ubufs; -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/9] panfrost: Handle disabled/empty UBOs
Prevents an assert(0) later in this (not so edge) case. We still have to have a dummy there. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index b3950950b4f..4d935f8d984 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1291,9 +1291,21 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* The rest are honest-to-goodness UBOs */ for (unsigned ubo = 1; ubo < ubo_count; ++ubo) { -mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo); size_t sz = buf->cb[ubo].buffer_size; +bool enabled = buf->enabled_mask & (1 << ubo); +bool empty = sz == 0; + +if (!enabled || empty) { +/* Stub out disabled UBOs to catch accesses */ + +ubos[ubo].size = 0; +ubos[ubo].ptr = 0xDEAD; +continue; +} + +mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo); + unsigned bytes_per_field = 16; unsigned aligned = ALIGN(sz, bytes_per_field); unsigned fields = aligned / bytes_per_field; -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/9] panfrost: Initial UBO implementation
This implements loads from direct vec4-aligned fields in UBOs. More future work is needed for indirect or unaligned loads (just compiler changes), but the core command stream work is handled here. Alyssa Rosenzweig (9): panfrost: Constant buffer refactor panfrost: Report UBO count panfrost: Allow for dynamic UBO count panfrost: Upload UBOs panfrost: Identify "uniform buffer count" bits panfrost: Handle disabled/empty UBOs panfrost/midgard: Implement UBO reads panfrost: DRY between shader stage setup panfrost: Allow up to 16 UBOs .../drivers/panfrost/include/panfrost-job.h | 15 +- .../panfrost/midgard/midgard_compile.c| 65 -- src/gallium/drivers/panfrost/pan_context.c| 198 -- src/gallium/drivers/panfrost/pan_context.h| 6 +- src/gallium/drivers/panfrost/pan_screen.c | 2 +- .../drivers/panfrost/pandecode/decode.c | 10 +- 6 files changed, 203 insertions(+), 93 deletions(-) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/9] panfrost: Upload UBOs
Now that all the counting is sorted, it's a matter of passing along a GPU address and going. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 31 ++ 1 file changed, 31 insertions(+) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index b1c234fba7c..34eafdfdf0b 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -981,6 +981,23 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned unreachable("No constant buffer"); } +static mali_ptr +panfrost_map_constant_buffer_gpu( +struct panfrost_context *ctx, +struct panfrost_constant_buffer *buf, +unsigned index) +{ +struct pipe_constant_buffer *cb = &buf->cb[index]; +struct panfrost_resource *rsrc = pan_resource(cb->buffer); + +if (rsrc) +return rsrc->bo->gpu; +else if (cb->user_buffer) +return panfrost_upload_transient(ctx, cb->user_buffer, cb->buffer_size); +else +unreachable("No constant buffer"); +} + /* Compute number of UBOs active (more specifically, compute the highest UBO * number addressable -- if there are gaps, include them in the count anyway). * We always include UBO #0 in the count, since we *need* uniforms enabled for @@ -1271,6 +1288,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ubos[0].size = MALI_POSITIVE((2 + uniform_count)); ubos[0].ptr = transfer.gpu >> 2; +/* The rest are honest-to-goodness UBOs */ + +for (unsigned ubo = 1; ubo < ubo_count; ++ubo) { +mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo); +size_t sz = buf->cb[ubo].buffer_size; + +unsigned bytes_per_field = 16; +unsigned aligned = ALIGN(sz, bytes_per_field); +unsigned fields = aligned / bytes_per_field; + +ubos[ubo].size = MALI_POSITIVE(fields); +ubos[ubo].ptr = gpu >> 2; +} + mali_ptr ubufs = panfrost_upload_transient(ctx, ubos, sz); postfix->uniforms = transfer.gpu; postfix->uniform_buffers = ubufs; -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/9] panfrost: DRY between shader stage setup
Just a little spring cleanup, extending UBOs to vertex shaders in the process. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 55 ++ 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 4d935f8d984..b8ad19cf0c6 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1010,6 +1010,26 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage) return 32 - __builtin_clz(mask); } +/* Fixes up a shader state with current state, returning a GPU address to the + * patched shader */ + +static mali_ptr +panfrost_patch_shader_state( +struct panfrost_context *ctx, +struct panfrost_shader_state *ss, +enum pipe_shader_type stage) +{ +ss->tripipe->texture_count = ctx->sampler_view_count[stage]; +ss->tripipe->sampler_count = ctx->sampler_count[stage]; + +ss->tripipe->midgard1.flags = 0x220; + +unsigned ubo_count = panfrost_ubo_count(ctx, stage); +ss->tripipe->midgard1.uniform_buffer_count = ubo_count; + +return ss->tripipe_gpu; +} + /* Go through dirty flags and actualise them in the cmdstream. */ void @@ -1043,16 +1063,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; -/* Late shader descriptor assignments */ - -vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; -vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; - -/* Who knows */ -vs->tripipe->midgard1.flags = 0x220; -vs->tripipe->midgard1.uniform_buffer_count = 1; - -ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; +ctx->payload_vertex.postfix._shader_upper = +panfrost_patch_shader_state(ctx, vs, PIPE_SHADER_VERTEX) >> 4; } if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { @@ -1074,13 +1086,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) assert(ctx->fs); struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; +panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT); + #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name COPY(shader); COPY(attribute_count); COPY(varying_count); +COPY(texture_count); +COPY(sampler_count); +COPY(sampler_count); COPY(midgard1.uniform_count); +COPY(midgard1.uniform_buffer_count); COPY(midgard1.work_count); +COPY(midgard1.flags); COPY(midgard1.unknown2); #undef COPY @@ -1089,11 +1108,14 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->blend->has_blend_shader) ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; -unsigned ubo_count = panfrost_ubo_count(ctx, PIPE_SHADER_FRAGMENT); -ctx->fragment_shader_core.midgard1.uniform_buffer_count = ubo_count; - /* Set late due to depending on render state */ -unsigned flags = MALI_EARLY_Z | 0x20 | 0x200; +unsigned flags = ctx->fragment_shader_core.midgard1.flags; + +/* Depending on whether it's legal to in the given shader, we + * try to enable early-z testing (or forward-pixel kill?) */ + +if (!variant->can_discard) +flags |= MALI_EARLY_Z; /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ @@ -1103,10 +1125,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->fragment_shader_core.midgard1.flags = flags; -/* Assign texture/sample count right before upload */ -ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; -ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; - /* Assign the stencil refs late */ ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; @@ -1120,7 +1138,6 @@ panfrost_emit_for_draw(struct panfrost_context *ct
[Mesa-dev] [PATCH 7/9] panfrost/midgard: Implement UBO reads
UBOs and uniforms now use a common code path with an explicit `index` argument passed, enabling UBO reads. Signed-off-by: Alyssa Rosenzweig --- .../panfrost/midgard/midgard_compile.c| 65 +++ 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index baf637d666a..938872cc09e 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -1069,12 +1069,20 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) #undef ALU_CASE +/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly + * optimized) versions of UBO #0 */ + static void -emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src *indirect_offset) +emit_ubo_read( +compiler_context *ctx, +unsigned dest, +unsigned offset, +nir_src *indirect_offset, +unsigned index) { /* TODO: half-floats */ -if (!indirect_offset && offset < ctx->uniform_cutoff) { +if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) { /* Fast path: For the first 16 uniforms, direct accesses are * 0-cycle, since they're just a register fetch in the usual * case. So, we alias the registers while we're still in @@ -1095,11 +1103,13 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src if (indirect_offset) { emit_indirect_offset(ctx, indirect_offset); -ins.load_store.unknown = 0x8700; /* xxx: what is this? */ +ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */ } else { -ins.load_store.unknown = 0x1E00; /* xxx: what is this? */ +ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */ } +/* TODO respect index */ + emit_mir_instruction(ctx, ins); } } @@ -1152,7 +1162,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr) unsigned uniform = ((uintptr_t) val) - 1; /* Emit the read itself -- this is never indirect */ -emit_uniform_read(ctx, dest, uniform, NULL); +emit_ubo_read(ctx, dest, uniform, NULL, 0); } /* Reads RGBA value from the tilebuffer and converts to a RGBA32F register, @@ -1231,7 +1241,7 @@ emit_fb_read_blend_scalar(compiler_context *ctx, unsigned reg) static void emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) { -unsigned offset, reg; +unsigned offset = 0, reg; switch (instr->intrinsic) { case nir_intrinsic_discard_if: @@ -1250,23 +1260,49 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) } case nir_intrinsic_load_uniform: -case nir_intrinsic_load_input: -offset = nir_intrinsic_base(instr); +case nir_intrinsic_load_ubo: +case nir_intrinsic_load_input: { +bool is_uniform = instr->intrinsic == nir_intrinsic_load_uniform; +bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo; + +if (!is_ubo) { +offset = nir_intrinsic_base(instr); +} unsigned nr_comp = nir_intrinsic_dest_components(instr); -bool direct = nir_src_is_const(instr->src[0]); -if (direct) { -offset += nir_src_as_uint(instr->src[0]); -} +nir_src *src_offset = nir_get_io_offset_src(instr); + +bool direct = nir_src_is_const(*src_offset); + +if (direct) +offset += nir_src_as_uint(*src_offset); /* We may need to apply a fractional offset */ int component = instr->intrinsic == nir_intrinsic_load_input ? nir_intrinsic_component(instr) : 0; reg = nir_dest_index(ctx, &instr->dest); -if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) { -emit_uniform_read(ctx, reg, ctx->sysval_count + offset, !direct ? &instr->src[0] : NULL); +if (is_uniform && !ctx->is_blend) { +emit_ubo_read(ctx, reg, ctx->sysval_count + offset, !direct ? &instr->src[0] : NULL, 0); +} else if (is_ubo) { +nir_src index = instr->src[0]; + +/* We don't yet support indirect UBOs. For indirect + * block numbers (if that's possible), we don't know + * enough about the hardware yet. For indirect sources, +
[Mesa-dev] [PATCH 9/9] panfrost: Allow up to 16 UBOs
This is the hardware max, as far as I can tell. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 5d3acc0a0dd..bff63f469c3 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -321,7 +321,7 @@ panfrost_get_shader_param(struct pipe_screen *screen, return 16 * 1024 * sizeof(float); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: -return 4; +return 16; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 0; -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] amd/rtld: update the ELF representation of LDS symbols
Marek, I thought you also r-b'd this? Either way r-b. On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák wrote: > > From: Nicolai Hähnle > > The initial prototype used a processor-specific symbol type, but > feedback suggests that an approach using processor-specific section > name that encodes the alignment analogous to SHN_COMMON symbols is > preferred. > > This patch keeps both variants around for now to reduce problems > with LLVM compatibility as we switch branches around. > > This also cleans up the error reporting in this function. > --- > src/amd/common/ac_rtld.c | 34 +++--- > 1 file changed, 27 insertions(+), 7 deletions(-) > > diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c > index 57d6b0151b4..ebf64d91658 100644 > --- a/src/amd/common/ac_rtld.c > +++ b/src/amd/common/ac_rtld.c > @@ -32,21 +32,25 @@ > > #include "ac_binary.h" > #include "ac_gpu_info.h" > #include "util/u_dynarray.h" > #include "util/u_math.h" > > // Old distributions may not have this enum constant > #define MY_EM_AMDGPU 224 > > #ifndef STT_AMDGPU_LDS > -#define STT_AMDGPU_LDS 13 > +#define STT_AMDGPU_LDS 13 // this is deprecated -- remove > +#endif > + > +#ifndef SHN_AMDGPU_LDS > +#define SHN_AMDGPU_LDS 0xff00 > #endif > > #ifndef R_AMDGPU_NONE > #define R_AMDGPU_NONE 0 > #define R_AMDGPU_ABS32_LO 1 > #define R_AMDGPU_ABS32_HI 2 > #define R_AMDGPU_ABS64 3 > #define R_AMDGPU_REL32 4 > #define R_AMDGPU_REL64 5 > #define R_AMDGPU_ABS32 6 > @@ -169,47 +173,60 @@ static bool layout_symbols(struct ac_rtld_symbol > *symbols, unsigned num_symbols, > * Read LDS symbols from the given \p section of the ELF of \p part and > append > * them to the LDS symbols list. > * > * Shared LDS symbols are filtered out. > */ > static bool read_private_lds_symbols(struct ac_rtld_binary *binary, > unsigned part_idx, > Elf_Scn *section, > uint32_t *lds_end_align) > { > -#define report_elf_if(cond) \ > +#define report_if(cond) \ > do { \ > if ((cond)) { \ > report_errorf(#cond); \ > return false; \ > } \ > } while (false) > +#define report_elf_if(cond) \ > + do { \ > + if ((cond)) { \ > + report_elf_errorf(#cond); \ > + return false; \ > + } \ > + } while (false) > > struct ac_rtld_part *part = &binary->parts[part_idx]; > Elf64_Shdr *shdr = elf64_getshdr(section); > uint32_t strtabidx = shdr->sh_link; > Elf_Data *symbols_data = elf_getdata(section, NULL); > report_elf_if(!symbols_data); > > const Elf64_Sym *symbol = symbols_data->d_buf; > size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); > > for (size_t j = 0; j < num_symbols; ++j, ++symbol) { > - if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS) > + struct ac_rtld_symbol s = {}; > + > + if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) { > + /* old-style LDS symbols from initial prototype -- > remove eventually */ > + s.align = MIN2(1u << (symbol->st_other >> 3), 1u << > 16); > + } else if (symbol->st_shndx == SHN_AMDGPU_LDS) { > + s.align = MIN2(symbol->st_value, 1u << 16); > + report_if(!util_is_power_of_two_nonzero(s.align)); > + } else > continue; > > - report_elf_if(symbol->st_size > 1u << 29); > + report_if(symbol->st_size > 1u << 29); > > - struct ac_rtld_symbol s = {}; > s.name = elf_strptr(part->elf, strtabidx, symbol->st_name); > s.size = symbol->st_size; > - s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); > s.part_idx = part_idx; > > if (!strcmp(s.name, "__lds_end")) { > report_elf_if(s.size != 0); > *lds_end_align = MAX2(*lds_end_align, s.align); > continue; > } > > const struct ac_rtld_symbol *shared = > find_symbol(&binary->lds_symbols, s.name, part_idx); > @@ -217,20 +234,21 @@ static bool read_private_lds_symbols(struct > ac_rtld_binary *binary, > report_elf_if(s.align > shared->align); > report_elf_if(s.size > shared->size); > continue; > } > > util_dynarray_append(&binary->lds_symbols, struct > ac_rtld_symbol, s); > } > > return true; > > +#undef report_if > #undef report_elf_if > } > > /** > * Open a binary consisting of one or more shader parts. > * > * \param binary the uninitializ
Re: [Mesa-dev] [PATCH 5/8] radeonsi: don't set spi_ps_input_* for monolithic shaders
Doesn't this cause assertions in si_shader_ps() for monolithic shaders? Some of these assertions check that at least one bit in a group is set and I think we end up with input_ena = 0 for monolithic shaders now? On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák wrote: > > From: Marek Olšák > > The driver doesn't use these values and ac_rtld has assertions > expecting the value of 0. > --- > src/gallium/drivers/radeonsi/si_shader.c | 39 > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 54b29d0ae01..0489399b827 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -6128,21 +6128,22 @@ static void si_get_ps_prolog_key(struct si_shader > *shader, > key->ps_prolog.states.bc_optimize_for_linear); > key->ps_prolog.ancillary_vgpr_index = > shader->info.ancillary_vgpr_index; > > if (info->colors_read) { > unsigned *color = shader->selector->color_attr_index; > > if (shader->key.part.ps.prolog.color_two_side) { > /* BCOLORs are stored after the last input. */ > key->ps_prolog.num_interp_inputs = info->num_inputs; > key->ps_prolog.face_vgpr_index = > shader->info.face_vgpr_index; > - shader->config.spi_ps_input_ena |= > S_0286CC_FRONT_FACE_ENA(1); > + if (separate_prolog) > + shader->config.spi_ps_input_ena |= > S_0286CC_FRONT_FACE_ENA(1); > } > > for (unsigned i = 0; i < 2; i++) { > unsigned interp = info->input_interpolate[color[i]]; > unsigned location = > info->input_interpolate_loc[color[i]]; > > if (!(info->colors_read & (0xf << i*4))) > continue; > > key->ps_prolog.color_attr_index[i] = color[i]; > @@ -6159,66 +6160,78 @@ static void si_get_ps_prolog_key(struct si_shader > *shader, > case TGSI_INTERPOLATE_COLOR: > /* Force the interpolation location for > colors here. */ > if > (shader->key.part.ps.prolog.force_persp_sample_interp) > location = > TGSI_INTERPOLATE_LOC_SAMPLE; > if > (shader->key.part.ps.prolog.force_persp_center_interp) > location = > TGSI_INTERPOLATE_LOC_CENTER; > > switch (location) { > case TGSI_INTERPOLATE_LOC_SAMPLE: > > key->ps_prolog.color_interp_vgpr_index[i] = 0; > - shader->config.spi_ps_input_ena |= > - S_0286CC_PERSP_SAMPLE_ENA(1); > + if (separate_prolog) { > + > shader->config.spi_ps_input_ena |= > + > S_0286CC_PERSP_SAMPLE_ENA(1); > + } > break; > case TGSI_INTERPOLATE_LOC_CENTER: > > key->ps_prolog.color_interp_vgpr_index[i] = 2; > - shader->config.spi_ps_input_ena |= > - S_0286CC_PERSP_CENTER_ENA(1); > + if (separate_prolog) { > + > shader->config.spi_ps_input_ena |= > + > S_0286CC_PERSP_CENTER_ENA(1); > + } > break; > case TGSI_INTERPOLATE_LOC_CENTROID: > > key->ps_prolog.color_interp_vgpr_index[i] = 4; > - shader->config.spi_ps_input_ena |= > - > S_0286CC_PERSP_CENTROID_ENA(1); > + if (separate_prolog) { > + > shader->config.spi_ps_input_ena |= > + > S_0286CC_PERSP_CENTROID_ENA(1); > + } > break; > default: > assert(0); > } > break; > case TGSI_INTERPOLATE_LINEAR: > /* Force the interpolati
[Mesa-dev] [Bug 99781] Some Unity games fail assertion on startup in glXCreateContextAttribsARB
https://bugs.freedesktop.org/show_bug.cgi?id=99781 --- Comment #21 from Hal Gentz --- I just spotted that this got reopened today when going through my emails for the first time in forever... this is unfortunate. I'll take another swing at this sometime next week using Uli Schlachter's new proposed method. Hopefully that doesn't cause a regression. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Possible bug in nir_algebraic?
I have encountered what I believe to be a bug in nir_algebraic. Since the rewrite to use automata, I'm not sure how to begin debugging it. I'm looking for some suggestions... even if the suggestion is, "Fix your patterns." I have added a pattern like: (('~fadd@32', ('fmul', ('fadd', 1.0, ('fneg', a)), ('fadd', 1.0, ('fneg', a))), ('fmul', ('flrp', a, 1.0, a), b)), ('flrp', 1.0, b, a), '!options->lower_flrp32'), While using NIR_PRINT=1, I see this in my instruction stream: vec1 32 ssa_2 = load_const (0x3f80 /* 1.00 */) ... vec1 32 ssa_196 = intrinsic load_uniform (ssa_195) (68, 4, 160) vec1 32 ssa_83 = fneg ssa_196 vec1 32 ssa_84 = fadd ssa_83, ssa_2 vec1 32 ssa_85 = fmul ssa_84, ssa_84 ... vec1 32 ssa_95 = flrp ssa_196, ssa_2, ssa_196 vec1 32 ssa_96 = fmul ssa_78, ssa_95 vec1 32 ssa_97 = fadd ssa_96, ssa_85 But nir_opt_algebraic does not make any progress. It sure looks like it should trigger with a = ssa_196 and b = ssa_78. However, progress is made if I change the pattern to (('~fadd@32', ('fmul', ('fadd', 1.0, ('fneg', a)), c), ('fmul', ('flrp', a, 1.0, a), b)), ('flrp', 1.0, b, a), '!options->lower_flrp32'), ssa_85 is definitely ('fmul', ssa_84, ssa_84), and ssa_84 is definitely ('fadd', 1.0, ('fneg', ssa_196))... both times. :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 99781] Some Unity games fail assertion on startup in glXCreateContextAttribsARB
https://bugs.freedesktop.org/show_bug.cgi?id=99781 Hal Gentz changed: What|Removed |Added Assignee|mesa-dev@lists.freedesktop. |zegen...@protonmail.com |org | -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] radeonsi: glmark2 - regression (GL_INVALID_OPERATION in glFramebufferTexture2D) since your work around 2019-06-05
Hello Emil, I see glmark2 - [desktop] blur-radius=5 libpng warning: iCCP: known incorrect sRGB profile Mesa: User error: GL_INVALID_OPERATION in glFramebufferTexture2D(window-system framebuffer) [desktop] blur-radius=5:effect=blur:passes=1:separable=true:windows=4: FPS: 4879 FrameTime: 0.205 ms after your commits around beginning of June (2019-06-05) or your 'mapi'-work commited around 2019-06-10. Have to bisect. Any hints/ideas for a good starting point? Greetings, Dieter ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 110603] Blocky and black opacity/alpha using RADV on some games
https://bugs.freedesktop.org/show_bug.cgi?id=110603 --- Comment #7 from Samuel Pitoiset --- Can you record a renderdoc capture of the problem please? -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 110662] shadow artifacts in Assassin's Creed: Unity
https://bugs.freedesktop.org/show_bug.cgi?id=110662 soredake changed: What|Removed |Added CC||fds...@krutt.org -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Eager to work on Mesa Project under XorgEVoC
Hello Xorg Community! My name is Priyanshu Varshney. a* fourth-year* student at *Indian Institute of Technology (IIT)*, Indore, India. I am eager to work on writing an advanced configuration tool for Mesa drivers and thus I want to know the possible mentors for the project so that I can start diving in the project. Thanking You Priyanshu Varshney IIT INDORE https://www.linkedin.com/in/priyanshu-varshney-476849152/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] XDC 2019: Less than three weeks to go to submit your talks, workshops or demos!
Hello! Less than three weeks to go to submit your talks, workshops or demos for this year's X.Org Developer Conference, which will be taking place in Montréal, Canada on October 2-4, 2019! The 2019 X.Org Developers Conference is the annual technical meeting for X Window System and Free Desktop developers. Attendees will gather to discuss outstanding technical issues related to the Open Source Graphics stack (Linux kernel, Mesa, DRM, Wayland, X11, etc.) and its software ecosystem. While any serious proposal will be gratefully considered, topics of interest to X.Org and freedesktop.org developers are encouraged. The program focus is on new development, ongoing challenges and anything else that will spark discussions among attendees in the hallway track. We are open to talks across all layers of the graphics stack, from the kernel to desktop environments / graphical applications and about how to make things better for the developers who build them. Head to the XDC website to learn more: https://xdc2019.x.org/ The deadline for submissions Sunday, 7 July 2019. Best, Mark ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] panfrost: Print a backtrace if there is one
Hey > Good stuff, but isn't this change making us hit assertions? Hm, I thought it only enabled debugging symbols. Do you have a recommendation on how to enable debug symbols but not enabling assertions? Cheers Rohan Garg ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] panfrost: Make the gitlab-ci.yml file more robust
On Friday, 21 June 2019 07:40:02 CEST Tomeu Vizoso wrote: > On 6/20/19 6:33 PM, Rohan Garg wrote: > >> Not sure I understand how this works. Isn't this going to cause > >> unnecessary container builds? > >> > >> And, why cannot developers just add whatever they want to DEBIAN_TAG > >> to that end? > > > > This will spin a container for each branch, yes, though I think that is > > how it should be. > > Well, things are like this right now precisely because people wanted to > avoid what you are proposing. I would be quite sad if every time I push a > new branch I had to wait for the container to be rebuilt. Also note that > this is how other projects based on wayland/ci-templates work, including > the rest of mesa. > > > The patch allows for someone working in a branch to > > ( if the need be ) customize their containers for their branch. > > As I said, you can easily do that atm by just updating DEBIAN_TAG. > > > To that extent, I've simply enabled the CI to auto generate a DEBIAN_TAG > > which depends on the branch name so that a developer doesn't have to > > change > > the DEBIAN_TAG themselves. > > But, why is that such a problem? > Fair enough, I'll rework the patch to drop the modified DEBIAN_TAG then. Cheers Rohan Garg signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] panfrost: Print a backtrace if there is one
On 6/21/19 12:23 PM, Rohan Garg wrote: Hey Good stuff, but isn't this change making us hit assertions? Hm, I thought it only enabled debugging symbols. Do you have a recommendation on how to enable debug symbols but not enabling assertions? I think assertions should be limited to the most basic of sanity checks, and the other asserts replaced by proper error handling. Cheers, Tomeu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] panfrost: ci: Fix parsing of crashed tests
Without this fix, LAVA isn't parsing crashes as failed tests, because the shell logging is interspersed within the fake deqp output. Signed-off-by: Tomeu Vizoso --- src/gallium/drivers/panfrost/ci/deqp-runner.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/ci/deqp-runner.sh b/src/gallium/drivers/panfrost/ci/deqp-runner.sh index 8645f75080e6..0b11202c1086 100644 --- a/src/gallium/drivers/panfrost/ci/deqp-runner.sh +++ b/src/gallium/drivers/panfrost/ci/deqp-runner.sh @@ -46,8 +46,8 @@ while [ -s /tmp/case-list.txt ]; do sed -i "0,/^$crashed_test$/d" /tmp/case-list.txt # So LAVA knows what happened - echo "Test case '$crashed_test'.." - echo " Crash" + echo "Test case '$crashed_test'.. + Crash" else # Consume a whole batch sed -i '1,'$BATCH_SIZE'd' /tmp/case-list.txt -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/1] panfrost: Use mir_foreach_instr_in_block_safe
Use the _safe version as asserted by gallium Signed-off-by: Rohan Garg --- src/gallium/drivers/panfrost/midgard/midgard_schedule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c index 0bf3502f41c..1aef19c824c 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c @@ -406,7 +406,7 @@ schedule_block(compiler_context *ctx, midgard_block *block) block->quadword_count = 0; -mir_foreach_instr_in_block(block, ins) { +mir_foreach_instr_in_block_safe(block, ins) { int skip; midgard_bundle bundle = schedule_bundle(ctx, block, ins, &skip); util_dynarray_append(&block->bundles, midgard_bundle, bundle); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] panfrost: ci: Fix parsing of crashed tests
A-b ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] panfrost: Track buffer initialization
We want to know if a given slice of a buffer is initialized at a particular point in the execution of the program. This is accomplished easily enough -- start out uninitialized and upon an operation writing to the buffer, mark it initialized. The motivation is to optimize away expensive operations (like wallpaper blits) when reading from an uninitialized buffer; since it's uninitialized, the results of these operations are undefined, and it's legal to take the fast path ^_^ Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 9 + src/gallium/drivers/panfrost/pan_fragment.c | 21 + src/gallium/drivers/panfrost/pan_resource.c | 12 ++-- src/gallium/drivers/panfrost/pan_resource.h | 3 +++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index d8c5510a31e..6257ffe2ac4 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1397,6 +1397,15 @@ panfrost_draw_wallpaper(struct pipe_context *pipe) if (ctx->pipe_framebuffer.cbufs[0] == NULL) return; +/* Check if the buffer has any content on it worth preserving */ + +struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0]; +struct panfrost_resource *rsrc = pan_resource(surf->texture); +unsigned level = surf->u.tex.level; + +if (!rsrc->bo->slices[level].initialized) +return; + /* Save the batch */ struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx); diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c index 70358fec3f3..d6b8afdc6b9 100644 --- a/src/gallium/drivers/panfrost/pan_fragment.c +++ b/src/gallium/drivers/panfrost/pan_fragment.c @@ -28,6 +28,17 @@ #include "util/u_format.h" +/* Mark a surface as written */ + +static void +panfrost_initialize_surface(struct pipe_surface *surf) +{ +unsigned level = surf->u.tex.level; +struct panfrost_resource *rsrc = pan_resource(surf->texture); + +rsrc->bo->slices[level].initialized = true; +} + /* Generate a fragment job. This should be called once per frame. (According to * presentations, this is supposed to correspond to eglSwapBuffers) */ @@ -38,6 +49,16 @@ panfrost_fragment_job(struct panfrost_context *ctx, bool has_draws) panfrost_sfbd_fragment(ctx, has_draws) : panfrost_mfbd_fragment(ctx, has_draws); +/* Mark the affected buffers as initialized, since we're writing to it */ +struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer; + +for (unsigned i = 0; i < fb->nr_cbufs; ++i) { +panfrost_initialize_surface(fb->cbufs[i]); +} + +if (fb->zsbuf) +panfrost_initialize_surface(fb->zsbuf); + struct mali_job_descriptor_header header = { .job_type = JOB_TYPE_FRAGMENT, .job_index = 1, diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index a99840e4a52..1a4ce8ef297 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -71,6 +71,7 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen, rsc->bo = screen->driver->import_bo(screen, whandle); rsc->bo->slices[0].stride = whandle->stride; +rsc->bo->slices[0].initialized = true; if (screen->ro) { rsc->scanout = @@ -509,7 +510,7 @@ panfrost_transfer_map(struct pipe_context *pctx, transfer->map = rzalloc_size(transfer, transfer->base.layer_stride * box->depth); assert(box->depth == 1); -if (usage & PIPE_TRANSFER_READ) { +if ((usage & PIPE_TRANSFER_READ) && bo->slices[level].initialized) { if (bo->layout == PAN_AFBC) { DBG("Unimplemented: reads from AFBC"); } else if (bo->layout == PAN_TILED) { @@ -528,6 +529,12 @@ panfrost_transfer_map(struct pipe_context *pctx, transfer->base.stride = bo->slices[level].stride; transfer->base.layer_stride = bo->cubemap_stride; +/* By mapping direct-write, we're implicitly already + * initialized (maybe), so be conservative */ + +if ((usage & PIPE_TRANSFER_WRITE) && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) +bo->slices[level].initialized = true; + return bo->cpu + bo->slices[level].offset + transfer->base.box.z * bo->cubemap_stride @@ -549,11 +556,12 @@ panfrost_transfer_unmap(struct pipe_context *pctx, struct panfrost_bo *bo = prsrc->bo; if (transfer->usage & PIPE_TR
[Mesa-dev] [PATCH 0/3] panfrost/midgard: Merge constants
Just a fun optimization, shaves some cycles off glmark shaders. Alyssa Rosenzweig (3): panfrost/midgard: Share swizzle/mask code panfrost/midgard: Share swizzle compose panfrost/midgard: Merge embedded constants .../drivers/panfrost/midgard/helpers.h| 60 + .../panfrost/midgard/midgard_compile.c| 29 --- .../drivers/panfrost/midgard/midgard_ra.c | 9 +- .../panfrost/midgard/midgard_schedule.c | 85 ++- 4 files changed, 127 insertions(+), 56 deletions(-) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] panfrost/midgard: Share swizzle compose
Signed-off-by: Alyssa Rosenzweig --- .../drivers/panfrost/midgard/helpers.h| 29 +++ .../drivers/panfrost/midgard/midgard_ra.c | 9 +- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h index ff069d3f8bb..cbf51fe882a 100644 --- a/src/gallium/drivers/panfrost/midgard/helpers.h +++ b/src/gallium/drivers/panfrost/midgard/helpers.h @@ -264,4 +264,33 @@ vector_alu_from_unsigned(unsigned u) return s; } +/* Composes two swizzles */ +static inline unsigned +pan_compose_swizzle(unsigned left, unsigned right) +{ +unsigned out = 0; + +for (unsigned c = 0; c < 4; ++c) { +unsigned s = (left >> (2*c)) & 0x3; +unsigned q = (right >> (2*s)) & 0x3; + +out |= (q << (2*c)); +} + +return out; +} + +/* Applies a swizzle to an ALU source */ + +static inline unsigned +vector_alu_apply_swizzle(unsigned src, unsigned swizzle) +{ +midgard_vector_alu_src s = +vector_alu_from_unsigned(src); + +s.swizzle = pan_compose_swizzle(s.swizzle, swizzle); + +return vector_alu_srco_unsigned(s); +} + #endif diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c index 7fd5e5b49e3..40a73709871 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_ra.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c @@ -92,14 +92,7 @@ static unsigned compose_swizzle(unsigned swizzle, unsigned mask, struct phys_reg reg, struct phys_reg dst) { -unsigned out = 0; - -for (unsigned c = 0; c < 4; ++c) { -unsigned s = (swizzle >> (2*c)) & 0x3; -unsigned q = (reg.swizzle >> (2*s)) & 0x3; - -out |= (q << (2*c)); -} +unsigned out = pan_compose_swizzle(swizzle, reg.swizzle); /* Based on the register mask, we need to adjust over. E.g if we're * writing to yz, a base swizzle of xy__ becomes _xy_. Save the -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] panfrost: Implement rasterizer discard
Now that scoreboarding is working, this is trivial. Alyssa Rosenzweig (2): panfrost: Implement rasterizer discard panfrost: Conditionally submit fragment job src/gallium/drivers/panfrost/pan_context.c | 14 -- src/gallium/drivers/panfrost/pan_drm.c | 5 - 2 files changed, 16 insertions(+), 3 deletions(-) -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/8] radeonsi: set the calling convention for inlined function calls
r-b On Thu, Jun 20, 2019 at 6:19 AM Marek Olšák wrote: > > From: Marek Olšák > > otherwise the behavior is undefined > --- > src/amd/common/ac_llvm_build.c | 8 > src/amd/common/ac_llvm_build.h | 3 +++ > src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 2 +- > src/gallium/drivers/radeonsi/si_shader.c | 2 +- > 4 files changed, 13 insertions(+), 2 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 1e6247ad72e..cdd4c36f5da 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -4427,10 +4427,18 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, > LLVMValueRef interp_ij) > > LLVMValueRef > ac_build_load_helper_invocation(struct ac_llvm_context *ctx) > { > LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", > ctx->i1, NULL, 0, > AC_FUNC_ATTR_READNONE); > result = LLVMBuildNot(ctx->builder, result, ""); > return LLVMBuildSExt(ctx->builder, result, ctx->i32, ""); > } > + > +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, > + LLVMValueRef *args, unsigned num_args) > +{ > + LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, > ""); > + LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func)); > + return ret; > +} > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index a1654d2b2c4..1928843c78c 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -713,20 +713,23 @@ ac_build_frexp_exp(struct ac_llvm_context *ctx, > LLVMValueRef src0, > LLVMValueRef > ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, > unsigned bitsize); > > LLVMValueRef > ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij); > > LLVMValueRef > ac_build_load_helper_invocation(struct ac_llvm_context *ctx); > > +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, > + LLVMValueRef *args, unsigned num_args); > + > LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, > LLVMAtomicRMWBinOp op, > LLVMValueRef ptr, LLVMValueRef val, > const char *sync_scope); > > LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, > LLVMValueRef ptr, > LLVMValueRef cmp, LLVMValueRef val, > const char *sync_scope); > > #ifdef __cplusplus > } > diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c > b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c > index 0f2934243a1..28da7b92250 100644 > --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c > +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c > @@ -660,21 +660,21 @@ void si_build_prim_discard_compute_shader(struct > si_shader_context *ctx) > index[1] = LLVMBuildSelect(builder, prim_is_odd, > index0, index1, ""); > } > } > > /* Execute the vertex shader for each vertex to get vertex positions. > */ > LLVMValueRef pos[3][4]; > for (unsigned i = 0; i < vertices_per_prim; i++) { > vs_params[param_vertex_id] = index[i]; > vs_params[param_instance_id] = instance_id; > > - LLVMValueRef ret = LLVMBuildCall(builder, vs, vs_params, > num_vs_params, ""); > + LLVMValueRef ret = ac_build_call(&ctx->ac, vs, vs_params, > num_vs_params); > for (unsigned chan = 0; chan < 4; chan++) > pos[i][chan] = LLVMBuildExtractValue(builder, ret, > chan, ""); > } > > /* Divide XYZ by W. */ > for (unsigned i = 0; i < vertices_per_prim; i++) { > for (unsigned chan = 0; chan < 3; chan++) > pos[i][chan] = ac_build_fdiv(&ctx->ac, pos[i][chan], > pos[i][3]); > } > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index eb75e2a77a4..54b29d0ae01 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -6584,21 +6584,21 @@ static void si_build_wrapper_function(struct > si_shader_context *ctx, > } > } else { > arg = LLVMBuildBitCast(builder, arg, > param_type, ""); > } > } > > in[param_idx] = arg; > out_idx += param_size; > } > > - ret = LLVMBuildCall(builder, parts[part], in, num_par
[Mesa-dev] [PATCH 1/3] panfrost/midgard: Share swizzle/mask code
Signed-off-by: Alyssa Rosenzweig --- .../drivers/panfrost/midgard/helpers.h| 31 +++ .../panfrost/midgard/midgard_compile.c| 29 - 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h index a2cc7e67eaf..ff069d3f8bb 100644 --- a/src/gallium/drivers/panfrost/midgard/helpers.h +++ b/src/gallium/drivers/panfrost/midgard/helpers.h @@ -151,6 +151,37 @@ quadword_size(int tag) #define COMPONENT_Z 0x2 #define COMPONENT_W 0x3 +#define SWIZZLE_ SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X) +#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X) +#define SWIZZLE_XYZX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_X) +#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W) +#define SWIZZLE_XYXZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_Z) +#define SWIZZLE_ SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, COMPONENT_W) + +static inline unsigned +swizzle_of(unsigned comp) +{ +switch (comp) { +case 1: +return SWIZZLE_; +case 2: +return SWIZZLE_XYXX; +case 3: +return SWIZZLE_XYZX; +case 4: +return SWIZZLE_XYZW; +default: +unreachable("Invalid component count"); +} +} + +static inline unsigned +mask_of(unsigned nr_comp) +{ +return (1 << nr_comp) - 1; +} + + /* See ISA notes */ #define LDST_NOP (3) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index 1cc551b603c..ddcb9a0a092 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -82,35 +82,6 @@ midgard_block_add_successor(midgard_block *block, midgard_block *successor) * driver seems to do it that way */ #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__)); -#define SWIZZLE_ SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X) -#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X) -#define SWIZZLE_XYZX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_X) -#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W) -#define SWIZZLE_XYXZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_Z) -#define SWIZZLE_ SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, COMPONENT_W) - -static inline unsigned -swizzle_of(unsigned comp) -{ -switch (comp) { -case 1: -return SWIZZLE_; -case 2: -return SWIZZLE_XYXX; -case 3: -return SWIZZLE_XYZX; -case 4: -return SWIZZLE_XYZW; -default: -unreachable("Invalid component count"); -} -} - -static inline unsigned -mask_of(unsigned nr_comp) -{ -return (1 << nr_comp) - 1; -} #define M_LOAD_STORE(name, rname, uname) \ static midgard_instruction m_##name(unsigned ssa, unsigned address) { \ -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] panfrost: Conditionally submit fragment job
If there are no tiling jobs and no clears, there is no need to submit a fragment job (relevant for transform feedback). Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_drm.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c index 77ec419398e..3914fdc09cc 100644 --- a/src/gallium/drivers/panfrost/pan_drm.c +++ b/src/gallium/drivers/panfrost/pan_drm.c @@ -265,7 +265,10 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool assert(!ret); } - ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, has_draws), PANFROST_JD_REQ_FS, surf); +if (job->first_tiler.gpu || job->clear) { +ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, has_draws), PANFROST_JD_REQ_FS, surf); +assert(!ret); +} return ret; } -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] panfrost/midgard: Merge embedded constants
In Midgard, a bundle consists of a few ALU instructions. Within the bundle, there is room for an optional 128-bit constant; this constant is shared across all instructions in the bundle. Unfortunately, many instructions want a 128-bit constant all to themselves (how selfish!). If we run out of space for constants in a bundle, the bundle has to be broken up, incurring a performance and space penalty. As an optimization, the scheduler now analyzes the constants coming in per-instruction and attempts to merge shared components, adjusting the swizzle accessing the bundle's constants appropriately. Concretely, given the GLSL: (a * vec4(1.5, 0.5, 0.5, 1.0)) + vec4(1.0, 2.3, 2.3, 0.5) instead of compiling to the naive two bundles: vmul.fmul [temp], [a], r26 fconstants 1.5, 0.5, 0.5, 1.0 vadd.fadd [out], [temp], r26 fconstants 1.0, 2.3, 2.3, 0.5 The scheduler can now fuse into a single (pipelined!) bundle: vmul.fmul [temp], [a], r26.xyyz vadd.fadd [out], [temp], r26.zwwy fconstants 1.5, 0.5, 1.0, 2.3 Signed-off-by: Alyssa Rosenzweig --- .../panfrost/midgard/midgard_schedule.c | 85 ++- 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c index 0bf3502f41c..7059f7bbe2a 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c @@ -147,6 +147,8 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction instructions_emitted = -1; midgard_instruction *pins = ins; +unsigned constant_count = 0; + for (;;) { midgard_instruction *ains = pins; @@ -251,33 +253,78 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction segment[segment_size++] = ains; -/* Only one set of embedded constants per - * bundle possible; if we have more, we must - * break the chain early, unfortunately */ +/* We try to reuse constants if possible, by adjusting + * the swizzle */ + +if (ains->has_blend_constant) { +bundle.has_blend_constant = 1; +bundle.has_embedded_constants = 1; +} else if (ains->has_constants) { +/* By definition, blend constants conflict with + * everything, so if there are already + * constants we break the bundle *now* */ + +if (bundle.has_blend_constant) +break; + +/* For anything but blend constants, we can do + * proper analysis, however */ + +/* TODO: Mask by which are used */ +uint32_t *constants = (uint32_t *) ains->constants; +uint32_t *bundles = (uint32_t *) bundle.constants; -if (ains->has_constants) { -if (bundle.has_embedded_constants) { -/* The blend constant needs to be - * alone, since it conflicts with - * everything by definition */ +uint32_t indices[4] = { 0 }; +bool break_bundle = false; -if (ains->has_blend_constant || bundle.has_blend_constant) +for (unsigned i = 0; i < 4; ++i) { +uint32_t cons = constants[i]; +bool constant_found = false; + +/* Search for the constant */ +for (unsigned j = 0; j < constant_count; ++j) { +if (bundles[j] != cons) +continue; + +/* We found it, reuse */ +indices[i] = j; +constant_found = true; break; +} + +if (constant_found) +continue; -/* ...but if there are already - * constants but these are the -
[Mesa-dev] [PATCH 1/2] panfrost: Implement rasterizer discard
D'aww, look cute that is now that scoreboarding is setup. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 6257ffe2ac4..a803a8b68cb 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1338,12 +1338,22 @@ panfrost_queue_draw(struct panfrost_context *ctx) /* Handle dirty flags now */ panfrost_emit_for_draw(ctx, true); +/* If rasterizer discard is enable, only submit the vertex */ + +bool rasterizer_discard = ctx->rasterizer +&& ctx->rasterizer->base.rasterizer_discard; + struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false); -struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true); +struct panfrost_transfer tiler; + +if (!rasterizer_discard) +tiler = panfrost_vertex_tiler_job(ctx, true); struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx); -if (ctx->wallpaper_batch) +if (rasterizer_discard) +panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE); +else if (ctx->wallpaper_batch) panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler); else panfrost_scoreboard_queue_fused_job(batch, vertex, tiler); -- 2.20.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/8] radeonsi: flatten the switch for DPBB tunables
r-b On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák wrote: > > From: Marek Olšák > > --- > .../drivers/radeonsi/si_state_binning.c| 18 -- > 1 file changed, 4 insertions(+), 14 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c > b/src/gallium/drivers/radeonsi/si_state_binning.c > index 6285ccc28c2..a6b1830b661 100644 > --- a/src/gallium/drivers/radeonsi/si_state_binning.c > +++ b/src/gallium/drivers/radeonsi/si_state_binning.c > @@ -395,34 +395,24 @@ void si_emit_dpbb_state(struct si_context *sctx) > punchout_mode = V_028060_AUTO; > disable_start_of_prim = (cb_target_enabled_4bit & > blend->blend_enable_4bit) != 0; > } > > /* Tunable parameters. Also test with DFSM enabled/disabled. */ > unsigned context_states_per_bin; /* allowed range: [0, 5] */ > unsigned persistent_states_per_bin; /* allowed range: [0, 31] */ > unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */ > > - switch (sctx->family) { > - case CHIP_VEGA10: > - case CHIP_VEGA12: > - case CHIP_VEGA20: > - case CHIP_RAVEN: > - case CHIP_RAVEN2: > - /* Tuned for Raven. Vega might need different values. */ > - context_states_per_bin = 5; > - persistent_states_per_bin = 31; > - fpovs_per_batch = 63; > - break; > - default: > - assert(0); > - } > + /* Tuned for Raven. Vega might need different values. */ > + context_states_per_bin = 5; > + persistent_states_per_bin = 31; > + fpovs_per_batch = 63; > > /* Emit registers. */ > struct uvec2 bin_size_extend = {}; > if (bin_size.x >= 32) > bin_size_extend.x = util_logbase2(bin_size.x) - 5; > if (bin_size.y >= 32) > bin_size_extend.y = util_logbase2(bin_size.y) - 5; > > unsigned initial_cdw = sctx->gfx_cs->current.cdw; > radeon_opt_set_context_reg( > -- > 2.17.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding
On 6/20/19 8:20 PM, Alyssa Rosenzweig wrote: This is a rather complex change, adding a lot of code but ideally cleaning up quite a bit as we go. Within a batch (single frame), there are multiple distinct Mali job types: SET_VALUE, VERTEX, TILER, FRAGMENT for the few that we emit right now (eventually more for compute and geometry shaders). Each hardware job has a mali_job_descriptor_header, which contains three fields of interest: job index, a dependencies list, and a next job pointer. The next job pointer in each job is used to form a linked list of submitted jobs. Easy enough. The job index and dependencies list, however, are used to form a dependency graph (a DAG, where each hardware job is a node and each dependency is a directed edge). Internally, this sets up a scoreboarding data structure for the hardware to dispatch jobs in parallel, enabling (for example) vertex shaders from different draws to execute in parallel while there are strict dependencies between tiling the geometry of a draw and running that vertex shader. For a while, we got by with an incredible series of total hacks, manually coding indices, lists, and dependencies. That worked for a moment, but combinatorial kaboom kicked in and it became an unmaintainable mess of spaghetti code. We can do better. This commit explicitly handles the scoreboarding by providing high-level manipulation for jobs. Rather than a command like "set dependency #2 to index 17", we can express quite naturally "add a dependency from job T on job V". Instead of some open-coded logic to copy a draw pointer into a delicate context array, we now have an elegant exposed API to simple "queue a job of type XYZ". The design is influenced by both our current requirements (standard ES2 draws and u_blitter) as well as the need for more complex scheduling in the future. For instance, blits can be optimized to use only a tiler job, without a vertex job first (since the screen-space vertices are known ahead-of-time) -- causing tiler-only jobs. Likewise, when using transform feedback with rasterizer discard enabled, vertex jobs are created (to run vertex shaders) with no corresponding tiler job. Both of these cases break the original model and could not be expressed with the open-coded logic. More generally, this will make it easier to add support for compute shaders, geometry shaders, and fused jobs (an optimization available on Bifrost). Incidentally, this moves quite a bit of state from the driver context to the batch, which helps with Rohan's refactor to eventually permit pipelining across framebuffers (one important outstanding optimization for FBO-heavy workloads). Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/meson.build | 1 + src/gallium/drivers/panfrost/pan_context.c| 132 + src/gallium/drivers/panfrost/pan_context.h| 19 +- src/gallium/drivers/panfrost/pan_drm.c| 6 +- src/gallium/drivers/panfrost/pan_job.c| 18 +- src/gallium/drivers/panfrost/pan_job.h| 61 +++ src/gallium/drivers/panfrost/pan_scoreboard.c | 453 ++ 7 files changed, 548 insertions(+), 142 deletions(-) create mode 100644 src/gallium/drivers/panfrost/pan_scoreboard.c diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 43d73ce2086..4298242f6b9 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -57,6 +57,7 @@ files_panfrost = files( 'pan_blend_shaders.c', 'pan_pretty_print.c', 'pan_fragment.c', + 'pan_scoreboard.c', 'pan_sfbd.c', 'pan_mfbd.c', 'pan_tiler.c', diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 41656236b5b..d8c5510a31e 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -517,15 +517,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); } -static void -panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) -{ -if (first->job_descriptor_size) -first->next_job_64 = (u64) (uintptr_t) next; -else -first->next_job_32 = (u32) (uintptr_t) next; -} - /* Generates a vertex/tiler job. This is, in some sense, the heart of the * graphics command stream. It should be called once per draw, accordding to * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in @@ -535,12 +526,8 @@ panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) struct panfrost_transfer panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) { -/* Each draw call corresponds to two jobs, and the set-value job is first */ -int draw_job_index = 1 + (2 * ctx->draw_count) + 1; - struct mali_job_descriptor_header job = {
Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding
> Very nice, this will fix a few tests. Ah, of course :) > Good riddance! To be fair, a lot of that just got moved in with panfrost_job.. :) > Can we remove now the has_draws arg? Maybe? The has_draws arg controls a lot of things beyond just, you know, having draws. The whole API there needs to be overhauled quite a bit. So yes, we can remove it, but -after- this is merged. Enough code is being changed here as it is; we don't need more things to break. > What's the primary batch? Might warrant an explanation somewhere. Meh, I was making up terminology as I went. The primary batch being everything but the FRAGMENT job -- so, the SET_VALUE/VERTEX/TILER jobs (and later COMPUTE/GEOMETRY/FUSED). I.e. everything that we actually handle in panfrost_job and the scoreboarding module, rather than just what we tag on in pan_fragment (which is sort of special cased? not that you couldn't also scoreboard those but there's no reason to since there's only ever one in the chain -- so no deps and manual index=1 -- as far as I know?). > This is excelent work, thanks! :D Thank you for the review! -- has_draws can be addressed later. Do you want me to add a v2 with a comment explaining what I meant by "primary batch"? Or should we just merge as-is (while CI is still green :) and add a followup patch later)? signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding
On 6/21/19 3:05 PM, Alyssa Rosenzweig wrote: Very nice, this will fix a few tests. Ah, of course :) Good riddance! To be fair, a lot of that just got moved in with panfrost_job.. :) Can we remove now the has_draws arg? Maybe? The has_draws arg controls a lot of things beyond just, you know, having draws. The whole API there needs to be overhauled quite a bit. So yes, we can remove it, but -after- this is merged. Enough code is being changed here as it is; we don't need more things to break. What's the primary batch? Might warrant an explanation somewhere. Meh, I was making up terminology as I went. The primary batch being everything but the FRAGMENT job -- so, the SET_VALUE/VERTEX/TILER jobs (and later COMPUTE/GEOMETRY/FUSED). I.e. everything that we actually handle in panfrost_job and the scoreboarding module, rather than just what we tag on in pan_fragment (which is sort of special cased? not that you couldn't also scoreboard those but there's no reason to since there's only ever one in the chain -- so no deps and manual index=1 -- as far as I know?). This is excelent work, thanks! :D Thank you for the review! -- has_draws can be addressed later. Do you want me to add a v2 with a comment explaining what I meant by "primary batch"? Or should we just merge as-is (while CI is still green :) and add a followup patch later)? Both sound good to me. Thanks, Tomeu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 7/9] radv: clear the depth/stencil resolve attachment if necessary
r-b On Wed, Jun 12, 2019 at 11:44 AM Samuel Pitoiset wrote: > > The driver might need to clear one aspect of the depth/stencil > resolve attachment before performing the resolve itself. > > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_meta_clear.c | 73 > 1 file changed, 55 insertions(+), 18 deletions(-) > > diff --git a/src/amd/vulkan/radv_meta_clear.c > b/src/amd/vulkan/radv_meta_clear.c > index 44aaf92f53d..b5824c68fe2 100644 > --- a/src/amd/vulkan/radv_meta_clear.c > +++ b/src/amd/vulkan/radv_meta_clear.c > @@ -715,13 +715,14 @@ static void > emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, > const VkClearAttachment *clear_att, > const VkClearRect *clear_rect, > + struct radv_subpass_attachment *ds_att, > uint32_t view_mask) > { > struct radv_device *device = cmd_buffer->device; > struct radv_meta_state *meta_state = &device->meta_state; > const struct radv_subpass *subpass = cmd_buffer->state.subpass; > const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; > - const uint32_t pass_att = > subpass->depth_stencil_attachment->attachment; > + const uint32_t pass_att = ds_att->attachment; > VkClearDepthStencilValue clear_value = > clear_att->clearValue.depthStencil; > VkImageAspectFlags aspects = clear_att->aspectMask; > const struct radv_image_view *iview = fb ? > fb->attachments[pass_att].attachment : NULL; > @@ -761,18 +762,25 @@ emit_depthstencil_clear(struct radv_cmd_buffer > *cmd_buffer, > iview, > samples_log2, > aspects, > - > subpass->depth_stencil_attachment->layout, > +ds_att->layout, > clear_rect, > clear_value); > if (!pipeline) > return; > > + struct radv_subpass clear_subpass = { > + .color_count = 0, > + .color_attachments = NULL, > + .depth_stencil_attachment = ds_att, > + }; > + > + radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass); > + > radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, > pipeline); > > if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, > - > subpass->depth_stencil_attachment->layout, > - clear_rect, clear_value)) > + ds_att->layout, clear_rect, > clear_value)) > radv_update_ds_clear_metadata(cmd_buffer, iview->image, > clear_value, aspects); > > @@ -799,6 +807,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer > *cmd_buffer, > radv_CmdSetStencilReference(cmd_buffer_h, > VK_STENCIL_FACE_FRONT_BIT, > prev_reference); > } > + > + radv_cmd_buffer_set_subpass(cmd_buffer, subpass); > } > > static uint32_t > @@ -1562,7 +1572,8 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer, > const VkClearRect *clear_rect, > enum radv_cmd_flush_bits *pre_flush, > enum radv_cmd_flush_bits *post_flush, > - uint32_t view_mask) > + uint32_t view_mask, > + bool ds_resolve_clear) > { > const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; > const struct radv_subpass *subpass = cmd_buffer->state.subpass; > @@ -1588,12 +1599,16 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer, > emit_color_clear(cmd_buffer, clear_att, clear_rect, > view_mask); > } > } else { > - const uint32_t pass_att = > subpass->depth_stencil_attachment->attachment; > - if (pass_att == VK_ATTACHMENT_UNUSED) > + struct radv_subpass_attachment *ds_att = > subpass->depth_stencil_attachment; > + > + if (ds_resolve_clear) > + ds_att = subpass->ds_resolve_attachment; > + > + if (ds_att->attachment == VK_ATTACHMENT_UNUSED) > return; > > - VkImageLayout image_layout = > subpass->depth_stencil_attachment->layout; > - const struct radv_image_view *iview = fb ? > fb->attachments[pass_att].attachment : NULL; > + VkImageLayout image_layout = ds_att->layout; > + const struct radv_image_view *iview = fb ? > fb->attachments[ds_att->attachment].attachment : NULL; > VkClearDept
Re: [Mesa-dev] [PATCH v2 6/9] radv: decompress HTILE if the resolve src image is compressed
We only need this if the HTILE is not tc-compat? Otherwise the read side is independent of compute/fragment shader. On Wed, Jun 12, 2019 at 11:44 AM Samuel Pitoiset wrote: > > It's required to decompress HTILE before resolving with the > compute path. > > v2: - do proper layout transitions > - account for the framebuffer layers > > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_meta_resolve.c | 18 +- > 1 file changed, 17 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_meta_resolve.c > b/src/amd/vulkan/radv_meta_resolve.c > index 7cadf937ee6..48ebbd1fc10 100644 > --- a/src/amd/vulkan/radv_meta_resolve.c > +++ b/src/amd/vulkan/radv_meta_resolve.c > @@ -784,6 +784,22 @@ radv_decompress_resolve_subpass_src(struct > radv_cmd_buffer *cmd_buffer) > radv_decompress_resolve_src(cmd_buffer, src_image, > src_att.layout, 1, ®ion); > } > + > + if (subpass->ds_resolve_attachment) { > + struct radv_subpass_attachment src_att = > *subpass->depth_stencil_attachment; > + struct radv_image_view *src_iview = > + fb->attachments[src_att.attachment].attachment; > + struct radv_image *src_image = src_iview->image; > + > + VkImageResolve region = {}; > + region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; > + region.srcSubresource.mipLevel = 0; > + region.srcSubresource.baseArrayLayer = src_iview->base_layer; > + region.srcSubresource.layerCount = layer_count; > + > + radv_decompress_resolve_src(cmd_buffer, src_image, > + src_att.layout, 1, ®ion); > + } > } > > /** > @@ -809,7 +825,7 @@ radv_decompress_resolve_src(struct radv_cmd_buffer > *cmd_buffer, > barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; > barrier.image = radv_image_to_handle(src_image); > barrier.subresourceRange = (VkImageSubresourceRange) { > - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, > + .aspectMask = region->srcSubresource.aspectMask, > .baseMipLevel = region->srcSubresource.mipLevel, > .levelCount = 1, > .baseArrayLayer = src_base_layer, > -- > 2.22.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface
The DRM driver has a dummy implementation and the non-drm backend is gone, so let's kill this perf counter interface. Signed-off-by: Boris Brezillon --- src/gallium/drivers/panfrost/pan_context.c | 14 -- src/gallium/drivers/panfrost/pan_drm.c | 14 -- src/gallium/drivers/panfrost/pan_screen.c | 10 -- src/gallium/drivers/panfrost/pan_screen.h | 4 4 files changed, 42 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index e4a04dd821f5..867b49c55967 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -48,9 +48,6 @@ #include "pan_util.h" #include "pan_tiler.h" -static int performance_counter_number = 0; -extern const char *pan_counters_base; - /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN @@ -1442,17 +1439,6 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, /* If readback, flush now (hurts the pipelined performance) */ if (flush_immediate) screen->driver->force_flush_fragment(ctx, fence); - -if (screen->driver->dump_counters && pan_counters_base) { -screen->driver->dump_counters(screen); - -char filename[128]; -snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); -FILE *fp = fopen(filename, "wb"); -fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); -fclose(fp); -} - #endif } diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c index aed50477ff7d..f4d0721071bb 100644 --- a/src/gallium/drivers/panfrost/pan_drm.c +++ b/src/gallium/drivers/panfrost/pan_drm.c @@ -318,18 +318,6 @@ panfrost_drm_force_flush_fragment(struct panfrost_context *ctx, } } -static void -panfrost_drm_enable_counters(struct panfrost_screen *screen) -{ - fprintf(stderr, "unimplemented: %s\n", __func__); -} - -static void -panfrost_drm_dump_counters(struct panfrost_screen *screen) -{ - fprintf(stderr, "unimplemented: %s\n", __func__); -} - static unsigned panfrost_drm_query_gpu_version(struct panfrost_screen *screen) { @@ -420,12 +408,10 @@ panfrost_create_drm_driver(int fd) driver->base.force_flush_fragment = panfrost_drm_force_flush_fragment; driver->base.allocate_slab = panfrost_drm_allocate_slab; driver->base.free_slab = panfrost_drm_free_slab; - driver->base.enable_counters = panfrost_drm_enable_counters; driver->base.query_gpu_version = panfrost_drm_query_gpu_version; driver->base.init_context = panfrost_drm_init_context; driver->base.fence_reference = panfrost_drm_fence_reference; driver->base.fence_finish = panfrost_drm_fence_finish; - driver->base.dump_counters = panfrost_drm_dump_counters; return &driver->base; } diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 5d3acc0a0dd5..8d43e0d9f4ca 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -66,8 +66,6 @@ int pan_debug = 0; struct panfrost_driver *panfrost_create_drm_driver(int fd); -const char *pan_counters_base = NULL; - static const char * panfrost_get_name(struct pipe_screen *screen) { @@ -585,14 +583,6 @@ panfrost_create_screen(int fd, struct renderonly *ro) screen->driver = panfrost_create_drm_driver(fd); -/* Dump performance counters iff asked for in the environment */ -pan_counters_base = getenv("PANCOUNTERS_BASE"); - -if (pan_counters_base) { -screen->driver->allocate_slab(screen, &screen->perf_counters, 64, true, 0, 0, 0); -screen->driver->enable_counters(screen); -} - if (pan_debug & PAN_DBG_TRACE) pandecode_initialize(); diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 0660be5128c7..c7504f3221d4 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -64,8 +64,6 @@ struct panfrost_driver { struct panfrost_memory *mem); void (*free_imported_bo) (struct panfrost_screen *screen, struct panfrost_bo *bo); -void (*enable_counters) (struct panfrost_screen *screen); -void (*dump_counters) (struct panfrost_screen *screen); unsigned (*query_gpu_version) (struct panfrost_screen *screen); int (*init_context) (struct panfrost_context *ctx); void (*fence_reference) (struct pipe_screen *screen, @@ -83,8 +81,6 @@ struct panfrost_screen { struct renderonly *ro; struct panfrost_dri
[Mesa-dev] [PATCH 2/2] panfrost: Kill the panfrost_driver abstraction
The non-drm backend is gone and there's no plan to bring it back to life. Let's get rid of the panfrost_driver abstraction and call the panfrost_drm_xxx() functions directly. Signed-off-by: Boris Brezillon --- src/gallium/drivers/panfrost/pan_afbc.c | 2 +- src/gallium/drivers/panfrost/pan_context.c | 32 +++--- src/gallium/drivers/panfrost/pan_drm.c | 102 ++-- src/gallium/drivers/panfrost/pan_job.c | 2 +- src/gallium/drivers/panfrost/pan_resource.c | 18 ++-- src/gallium/drivers/panfrost/pan_screen.c | 8 +- src/gallium/drivers/panfrost/pan_screen.h | 76 +-- 7 files changed, 109 insertions(+), 131 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_afbc.c b/src/gallium/drivers/panfrost/pan_afbc.c index 4bef833f1820..5621d1f333a3 100644 --- a/src/gallium/drivers/panfrost/pan_afbc.c +++ b/src/gallium/drivers/panfrost/pan_afbc.c @@ -138,7 +138,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr unsigned buffer_size = header_size + body_size; /* Allocate the AFBC slab itself, large enough to hold the above */ -screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, +panfrost_drm_allocate_slab(screen, &rsrc->bo->afbc_slab, ALIGN(buffer_size, 4096) / 4096, true, 0, 0, 0); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 867b49c55967..14d29f5d20f1 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -87,7 +87,7 @@ panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource /* 8 byte checksum per tile */ rsrc->bo->checksum_stride = tile_w * 8; int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); -screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); +panfrost_drm_allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); rsrc->bo->has_checksum = true; } @@ -1431,14 +1431,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, /* If visual, we can stall a frame */ if (!flush_immediate) -screen->driver->force_flush_fragment(ctx, fence); +panfrost_drm_force_flush_fragment(ctx, fence); screen->last_fragment_flushed = false; screen->last_job = job; /* If readback, flush now (hurts the pipelined performance) */ if (flush_immediate) -screen->driver->force_flush_fragment(ctx, fence); +panfrost_drm_force_flush_fragment(ctx, fence); #endif } @@ -2503,11 +2503,11 @@ panfrost_destroy(struct pipe_context *pipe) if (panfrost->blitter) util_blitter_destroy(panfrost->blitter); -screen->driver->free_slab(screen, &panfrost->scratchpad); -screen->driver->free_slab(screen, &panfrost->varying_mem); -screen->driver->free_slab(screen, &panfrost->shaders); -screen->driver->free_slab(screen, &panfrost->tiler_heap); -screen->driver->free_slab(screen, &panfrost->tiler_polygon_list); +panfrost_drm_free_slab(screen, &panfrost->scratchpad); +panfrost_drm_free_slab(screen, &panfrost->varying_mem); +panfrost_drm_free_slab(screen, &panfrost->shaders); +panfrost_drm_free_slab(screen, &panfrost->tiler_heap); +panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list); ralloc_free(pipe); } @@ -2660,12 +2660,12 @@ panfrost_setup_hardware(struct panfrost_context *ctx) ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); } -screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); -screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); -screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); -screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); -screen->driver->allocate_slab(screen, &ctx->tiler_polygon_list, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); -screen->driver->allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0); +panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); +panfrost_drm_allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); +panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); +panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 32768,
[Mesa-dev] Proposal for the future of www.mesa3d.org
A while back, Laura and Jean was working on a Sphinx-conversion of the mesa-documentation. Sadly this work stranded due to it also trying to move to using GitLab Pages for hosting www.mesa3d.org, and because the documentation and the websit eis the same thing, this lead to problems with hosting the release-archive (www.mesa3d.org/archive/). Since then, I've taken a look at trying to revive this work. So far, I've taken most of the changed Laura did to the website post-RST conversion, and performed them before instead. I've also automated more of the conversion process, so we can easier get an up-to-date conversion. The result can be viewed here: https://kusma.pages.freedesktop.org/mesa/ https://gitlab.freedesktop.org/kusma/mesa/tree/docs-sphinx-v2 Please note that there's some differences: - I don't do any "mesa-specific styling". This can be done on top if needed, simply by cherry-picking Laura's commits for this. But I'm not sure we need it, more about this further down. - Some of the commit history might be incorrectly attributed to me instead of Laura. I intend to fix this up before upstreaming anything here. - The conversion isn't entirely up-to-date, but it's *fairly* recent. So yeah, the big elephant in the room is what to do with www.mesa3d.org/archive. This is where I have an alternative suggestion: How about we split the documentation and the website into two sites, www.mesa3d.org and docs.mesa3d.org, and maintain the website in a separate repository? We would of course have to set up some redirects to make old URLs point to the latest version (at least for a transition period). This has some additional benefits: - We don't need to push things to master to update things like news, that aren't really related to the code. - We can separate information that is technical documentation from information that are is "project marketing". - ...And because we don't need for the docs to appeal as "project marketing", we can keep the neutral readthedocs theme as-is, as it's a bit more easy on the eye IMO. - It makes the article index a bit more logical, as there's a few articles that doesn't really make sense to read after you already have the source tree. Why would you wonder who the webmaster is (docs/webmaster.html) or where to download mesa (docs/download.html) when reading the source? - We can host docs.mesa3d.org using GitLab pages (or point it to something like readthedocs.org) without having to change the hosting for www.mesa3d.org. In addition to this, I've also had a look at modernizing www.mesa.org as well, and I've made a proposal for a new, responisive website: https://kusma.pages.freedesktop.org/ https://gitlab.freedesktop.org/kusma/kusma.pages.freedesktop.org/ Quite a few things to notice: - Many links here forward to docs.mesa3d.org, which doesn't exist yet. - The redirects are done using meta-refresh tags instead of HTTP redirects, so they will only be redirected by an actual user-agent, not by curl or wget. - The site is using logos of Khronos APIs which might not be OK without approval. The legality of this needs to be researched. - Most of the content here is "usable placeholder" text, but by no means final. For instance, the descriptions of the APIs and drivers probably needs work. Especially the driver-decription should probably be written by the driver-teams rather than me. - Some drivers are missing. I just didn't bother writing more placeholder. - What content goes in which site is by no means decided on. - Some content isn't yet in either site; in particular, non-html files, like for instance the contents of www.mesa3d.org/specs. And since GitLab pages doesn't do directory listings, that folder (regardless of where it'd be reciding) would need an index added. - The site is made using Jekyll, but any static-site generator would do, really. The redirect-issue is due to the prototype currently being hosted in GitLab pages, and is a GitLab pages limitation. See https://gitlab.com/gitlab-org/gitlab-pages/issues/24 for more details. I doubt this would be a problem for documentation, but the same approach won't work for www.mesa3d.org/archive. Without solving that problem, we can't really go live with this while hosting it on GitLab pages. But we could go forward *without* hosting www.mesa3d.org in GitLab pages in the short term. I don't know how we currently deploy the website, I guess that's done manually by someone at some points? If so, we'd just update the manual recipie, I guess. I think the long-term goal should be to also move www.mesa3d.org to GitLab pages as well, and I have some ideas for how to deal with the www.mesa3d.org/archive-problem, but this is a much longer discussion, and this email is already too long. So if someone wants to discuss that, feel free to reply, and I'll happily tell you about it! Anyway, thoughts? Objections? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop
Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding
On Thursday, 20 June 2019 20:20:38 CEST Alyssa Rosenzweig wrote: > This is a rather complex change, adding a lot of code but ideally > cleaning up quite a bit as we go. > > Within a batch (single frame), there are multiple distinct Mali job > types: SET_VALUE, VERTEX, TILER, FRAGMENT for the few that we emit right > now (eventually more for compute and geometry shaders). Each hardware > job has a mali_job_descriptor_header, which contains three fields of > interest: job index, a dependencies list, and a next job pointer. > > The next job pointer in each job is used to form a linked list of > submitted jobs. Easy enough. > > The job index and dependencies list, however, are used to form a > dependency graph (a DAG, where each hardware job is a node and each > dependency is a directed edge). Internally, this sets up a scoreboarding > data structure for the hardware to dispatch jobs in parallel, enabling > (for example) vertex shaders from different draws to execute in parallel > while there are strict dependencies between tiling the geometry of a > draw and running that vertex shader. > > For a while, we got by with an incredible series of total hacks, > manually coding indices, lists, and dependencies. That worked for a > moment, but combinatorial kaboom kicked in and it became an > unmaintainable mess of spaghetti code. > > We can do better. This commit explicitly handles the scoreboarding by > providing high-level manipulation for jobs. Rather than a command like > "set dependency #2 to index 17", we can express quite naturally "add a > dependency from job T on job V". Instead of some open-coded logic to > copy a draw pointer into a delicate context array, we now have an > elegant exposed API to simple "queue a job of type XYZ". > > The design is influenced by both our current requirements (standard ES2 > draws and u_blitter) as well as the need for more complex scheduling in > the future. For instance, blits can be optimized to use only a tiler > job, without a vertex job first (since the screen-space vertices are > known ahead-of-time) -- causing tiler-only jobs. Likewise, when using > transform feedback with rasterizer discard enabled, vertex jobs are > created (to run vertex shaders) with no corresponding tiler job. Both of > these cases break the original model and could not be expressed with the > open-coded logic. More generally, this will make it easier to add > support for compute shaders, geometry shaders, and fused jobs (an > optimization available on Bifrost). > > Incidentally, this moves quite a bit of state from the driver context to > the batch, which helps with Rohan's refactor to eventually permit > pipelining across framebuffers (one important outstanding optimization > for FBO-heavy workloads). > > Signed-off-by: Alyssa Rosenzweig > --- > src/gallium/drivers/panfrost/meson.build | 1 + > src/gallium/drivers/panfrost/pan_context.c| 132 + > src/gallium/drivers/panfrost/pan_context.h| 19 +- > src/gallium/drivers/panfrost/pan_drm.c| 6 +- > src/gallium/drivers/panfrost/pan_job.c| 18 +- > src/gallium/drivers/panfrost/pan_job.h| 61 +++ > src/gallium/drivers/panfrost/pan_scoreboard.c | 453 ++ > 7 files changed, 548 insertions(+), 142 deletions(-) > create mode 100644 src/gallium/drivers/panfrost/pan_scoreboard.c > > diff --git a/src/gallium/drivers/panfrost/meson.build > b/src/gallium/drivers/panfrost/meson.build index 43d73ce2086..4298242f6b9 > 100644 > --- a/src/gallium/drivers/panfrost/meson.build > +++ b/src/gallium/drivers/panfrost/meson.build > @@ -57,6 +57,7 @@ files_panfrost = files( >'pan_blend_shaders.c', >'pan_pretty_print.c', >'pan_fragment.c', > + 'pan_scoreboard.c', >'pan_sfbd.c', >'pan_mfbd.c', >'pan_tiler.c', > diff --git a/src/gallium/drivers/panfrost/pan_context.c > b/src/gallium/drivers/panfrost/pan_context.c index 41656236b5b..d8c5510a31e > 100644 > --- a/src/gallium/drivers/panfrost/pan_context.c > +++ b/src/gallium/drivers/panfrost/pan_context.c > @@ -517,15 +517,6 @@ panfrost_default_shader_backend(struct panfrost_context > *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); } > > -static void > -panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr > next) -{ > -if (first->job_descriptor_size) > -first->next_job_64 = (u64) (uintptr_t) next; > -else > -first->next_job_32 = (u32) (uintptr_t) next; > -} > - > /* Generates a vertex/tiler job. This is, in some sense, the heart of the > * graphics command stream. It should be called once per draw, accordding > to * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, > but in @@ -535,12 +526,8 @@ panfrost_link_job_pair(struct > mali_job_descriptor_header *first, mali_ptr next) struct panfrost_transfer > panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) > { > -/* Each draw call corresponds to
[Mesa-dev] [PATCH] radv: add support for VK_AMD_buffer_marker
This simple extension might be useful for debugging purposes. GAPID has support for it. Signed-off-by: Samuel Pitoiset --- A simple crucible test: https://gitlab.freedesktop.org/mesa/crucible/merge_requests/47 src/amd/vulkan/radv_cmd_buffer.c | 35 +++ src/amd/vulkan/radv_extensions.py | 1 + 2 files changed, 36 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 9b4d4528028..215ccced144 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5716,3 +5716,38 @@ void radv_CmdDrawIndirectByteCountEXT( radv_draw(cmd_buffer, &info); } + +/* VK_AMD_buffer_marker */ +void radv_CmdWriteBufferMarkerAMD( +VkCommandBuffer commandBuffer, +VkPipelineStageFlagBits pipelineStage, +VkBufferdstBuffer, +VkDeviceSizedstOffset, +uint32_tmarker) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset; + + si_emit_cache_flush(cmd_buffer); + + if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, marker); + radeon_emit(cs, 0); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + } else { + si_cs_emit_write_event_eop(cs, + cmd_buffer->device->physical_device->rad_info.chip_class, + radv_cmd_buffer_uses_mec(cmd_buffer), + V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DATA_SEL_VALUE_32BIT, + va, marker, + cmd_buffer->gfx9_eop_bug_va); + } +} diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index b676cb8b8e2..1708173817b 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -130,6 +130,7 @@ EXTENSIONS = [ Extension('VK_EXT_transform_feedback',1, True), Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_EXT_ycbcr_image_arrays',1, True), +Extension('VK_AMD_buffer_marker', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader',1, True), Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800'), -- 2.22.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [ANNOUNCE] Mesa 19.1.1 release candidate
Hello list, The candidate for the Mesa 19.1.1 is now available. Currently we have: - 27 queued - 0 nominated (outstanding) - and 0 rejected patch The current queue consists mostly in fixes for different drivers (RADV, ANV, Nouveau, Virgl, V3D, R300g, ...) The queue also contains different fixes for different parts (Meson build, GLX, etc). Take a look at section "Mesa stable queue" for more information Testing reports/general approval Any testing reports (or general approval of the state of the branch) will be greatly appreciated. The plan is to have 19.1.1 this Tuesday (25th June), around or shortly after 10:00 GMT. If you have any questions or suggestions - be that about the current patch queue or otherwise, please go ahead. Trivial merge conflicts --- commit 25a34df61439b25645d03510d6354cb1f5e8a185 Author: Kenneth Graunke iris: Fix iris_flush_and_dirty_history to actually dirty history. (cherry picked from commit 64fb20ed326fa0e524582225faaa4bb28f6e4349) Cheers, J.A. Mesa stable queue - Nominated (0) == Queued (27) === Alejandro Piñeiro (1): v3d: fix checking twice auf flag Bas Nieuwenhuizen (5): radv: Skip transitions coming from external queue. radv: Decompress DCC when the image format is not allowed for buffers. radv: Fix vulkan build in meson. anv: Fix vulkan build in meson. meson: Allow building radeonsi with just the android platform. Dave Airlie (1): nouveau: fix frees in unsupported IR error paths. Eduardo Lima Mitev (1): freedreno/a5xx: Fix indirect draw max_indices calculation Eric Engestrom (3): util/futex: fix dangling pointer use glx: fix glvnd pointer types util/os_file: resize buffer to what was actually needed Gert Wollny (1): virgl: Assume sRGB write control for older guest kernels or virglrenderer hosts Haihao Xiang (1): i965: support UYVY for external import only Jason Ekstrand (1): anv: Set STATE_BASE_ADDRESS upper bounds on gen7 Kenneth Graunke (2): glsl: Fix out of bounds read in shader_cache_read_program_metadata iris: Fix iris_flush_and_dirty_history to actually dirty history. Kevin Strasser (2): gallium/winsys/kms: Fix dumb buffer bpp st/mesa: Add rgbx handling for fp formats Lionel Landwerlin (2): anv: do not parse genxml data without INTEL_DEBUG=bat intel/dump: fix segfault when the app hasn't accessed the device Mathias Fröhlich (1): egl: Don't add hardware device if there is no render node v2. Richard Thier (1): r300g: restore performance after RADEON_FLAG_NO_INTERPROCESS_SHARING was added Rob Clark (1): freedreno/a6xx: un-swap X24S8_UINT Samuel Pitoiset (4): radv: fix occlusion queries on VegaM radv: fix VK_EXT_memory_budget if one heap isn't available radv: fix FMASK expand with SRGB formats radv: disable viewport clamping even if FS doesn't write Z Rejected (0) = ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface
R-b, that's long overdue seeing as the DRM driver has a totally different (better? :) ) interface anyway (Thank you!) signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] panfrost: Kill the panfrost_driver abstraction
Oh, nice! I'm going to give this a tentative Reviewed-by. The changes themselves look good (and there shouldn't be functional changes). I do want this to happen; I just need to give some thought to ensuring that yes, this is really what we want to do. I'm having troubles thinking of when we might not want this. Two issues come to mind: Bifrost bring-up and device-free testing. That said, I think Bifrost is a non-issue since the DRM driver will probably support that before we do. As for device-free testing, it should not be too hard to include a mock version of pan_drm.c that's compile-time selected (or even a mock kernel module?). It's a useful thing to desire since often you're just looking for issues that can be tracked via PAN_MESA_DEBUG=trace and MIDGARD_MESA_DEBUG=shaders without needing a physical board. Then again, boards are so cheap that it's probably not worth our while to bother with that levle of abstraction, so maybe this is a no-op as well. On Fri, Jun 21, 2019 at 03:57:27PM +0200, Boris Brezillon wrote: > The non-drm backend is gone and there's no plan to bring it back to > life. Let's get rid of the panfrost_driver abstraction and call the > panfrost_drm_xxx() functions directly. > > Signed-off-by: Boris Brezillon > --- > src/gallium/drivers/panfrost/pan_afbc.c | 2 +- > src/gallium/drivers/panfrost/pan_context.c | 32 +++--- > src/gallium/drivers/panfrost/pan_drm.c | 102 ++-- > src/gallium/drivers/panfrost/pan_job.c | 2 +- > src/gallium/drivers/panfrost/pan_resource.c | 18 ++-- > src/gallium/drivers/panfrost/pan_screen.c | 8 +- > src/gallium/drivers/panfrost/pan_screen.h | 76 +-- > 7 files changed, 109 insertions(+), 131 deletions(-) > > diff --git a/src/gallium/drivers/panfrost/pan_afbc.c > b/src/gallium/drivers/panfrost/pan_afbc.c > index 4bef833f1820..5621d1f333a3 100644 > --- a/src/gallium/drivers/panfrost/pan_afbc.c > +++ b/src/gallium/drivers/panfrost/pan_afbc.c > @@ -138,7 +138,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct > panfrost_resource *rsr > unsigned buffer_size = header_size + body_size; > > /* Allocate the AFBC slab itself, large enough to hold the above */ > -screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, > +panfrost_drm_allocate_slab(screen, &rsrc->bo->afbc_slab, > ALIGN(buffer_size, 4096) / 4096, > true, 0, 0, 0); > > diff --git a/src/gallium/drivers/panfrost/pan_context.c > b/src/gallium/drivers/panfrost/pan_context.c > index 867b49c55967..14d29f5d20f1 100644 > --- a/src/gallium/drivers/panfrost/pan_context.c > +++ b/src/gallium/drivers/panfrost/pan_context.c > @@ -87,7 +87,7 @@ panfrost_enable_checksum(struct panfrost_context *ctx, > struct panfrost_resource > /* 8 byte checksum per tile */ > rsrc->bo->checksum_stride = tile_w * 8; > int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); > -screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, > pages, false, 0, 0, 0); > +panfrost_drm_allocate_slab(screen, &rsrc->bo->checksum_slab, pages, > false, 0, 0, 0); > > rsrc->bo->has_checksum = true; > } > @@ -1431,14 +1431,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, > bool flush_immediate, > /* If visual, we can stall a frame */ > > if (!flush_immediate) > -screen->driver->force_flush_fragment(ctx, fence); > +panfrost_drm_force_flush_fragment(ctx, fence); > > screen->last_fragment_flushed = false; > screen->last_job = job; > > /* If readback, flush now (hurts the pipelined performance) */ > if (flush_immediate) > -screen->driver->force_flush_fragment(ctx, fence); > +panfrost_drm_force_flush_fragment(ctx, fence); > #endif > } > > @@ -2503,11 +2503,11 @@ panfrost_destroy(struct pipe_context *pipe) > if (panfrost->blitter) > util_blitter_destroy(panfrost->blitter); > > -screen->driver->free_slab(screen, &panfrost->scratchpad); > -screen->driver->free_slab(screen, &panfrost->varying_mem); > -screen->driver->free_slab(screen, &panfrost->shaders); > -screen->driver->free_slab(screen, &panfrost->tiler_heap); > -screen->driver->free_slab(screen, &panfrost->tiler_polygon_list); > +panfrost_drm_free_slab(screen, &panfrost->scratchpad); > +panfrost_drm_free_slab(screen, &panfrost->varying_mem); > +panfrost_drm_free_slab(screen, &panfrost->shaders); > +panfrost_drm_free_slab(screen, &panfrost->tiler_heap); > +panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list); > > ralloc_free(pipe); > } > @@ -2660,12 +2660,12 @@ panfrost_setup_hardware(struct panfrost_context *ctx) > ctx->transient_pools[i].entries[0] = (struct > panfrost_m
[Mesa-dev] [PATCH 1/3] gallium/util: rewrite depth-stencil blit shaders
From: Marek Olšák - merge all 3 functions (Z, S, ZS) - don't write the color output - read the value from texel.x, then write it to position.z or stencil.y (don't use the value from texel.y or texel.z) --- src/gallium/auxiliary/util/u_blitter.c| 19 +- src/gallium/auxiliary/util/u_simple_shaders.c | 185 -- src/gallium/auxiliary/util/u_simple_shaders.h | 25 +-- 3 files changed, 46 insertions(+), 183 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 3dc49cd0958..8e4807ec670 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -1010,24 +1010,22 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, if (use_txf) shader = &ctx->fs_texfetch_depth[target][1]; else shader = &ctx->fs_texfetch_depth[target][0]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0); - *shader = -util_make_fragment_tex_shader_writedepth(pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR, - ctx->has_tex_lz, use_txf); + *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_Z, tgsi_tex, +ctx->has_tex_lz, use_txf); } return *shader; } } static inline void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx, enum pipe_texture_target target, unsigned nr_samples, @@ -1055,25 +1053,22 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx, if (use_txf) shader = &ctx->fs_texfetch_depthstencil[target][1]; else shader = &ctx->fs_texfetch_depthstencil[target][0]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0); - *shader = -util_make_fragment_tex_shader_writedepthstencil(pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR, -ctx->has_tex_lz, -use_txf); + *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_ZS, tgsi_tex, +ctx->has_tex_lz, use_txf); } return *shader; } } static inline void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx, enum pipe_texture_target target, unsigned nr_samples, @@ -1101,24 +1096,22 @@ void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx, if (use_txf) shader = &ctx->fs_texfetch_stencil[target][1]; else shader = &ctx->fs_texfetch_stencil[target][0]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0); - *shader = -util_make_fragment_tex_shader_writestencil(pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR, - ctx->has_tex_lz, use_txf); + *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_S, tgsi_tex, +ctx->has_tex_lz, use_txf); } return *shader; } } /** * Generate and save all fragment shaders that we will ever need for * blitting. Drivers which use the 'draw' fallbacks will typically use diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d62a65579ae..c111eaf1db5 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -371,187 +371,76 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, { return util_make_fragment_tex_shader_writemask( pipe, tex_target, interp_mode, TGSI_WRITEMASK_XYZW, stype, dtype, load_level_zero, use_txf); } -/** - * Make a simple fragment texture shader which reads an X component from - * a texture and writes it as depth. - */ -void * -util_make_fragment_tex_shader_writedepth(struct pipe
[Mesa-dev] [PATCH 2/3] gallium/u_blitter: implement copying from ZS to color and vice versa
From: Marek Olšák This is for drivers that can't map depth and stencil and need to blit them to a color texture for CPU access. This also useful for drivers using separate depth and stencil. --- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 6 + src/gallium/auxiliary/util/u_blitter.c| 147 src/gallium/auxiliary/util/u_blitter.h| 29 src/gallium/auxiliary/util/u_simple_shaders.c | 161 ++ src/gallium/auxiliary/util/u_simple_shaders.h | 6 + 5 files changed, 314 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index 54a1ee15b68..2ade618db00 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -158,20 +158,26 @@ OP11(UARL) OP13(UCMP) OP11(IABS) OP11(ISSG) OP11(IMG2HND) OP11(SAMP2HND) OP12(IMUL_HI) OP12(UMUL_HI) +OP13(UBFE) +OP11(F2D) +OP11(D2F) +OP11(U2D) +OP11(D2U) +OP12(DMUL) #undef OP00 #undef OP01 #undef OP10 #undef OP11 #undef OP12 #undef OP13 #undef OP00_LBL #undef OP01_LBL diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 8e4807ec670..18a5c272454 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -95,20 +95,25 @@ struct blitter_context_priv /* FS which outputs one sample from a multisample texture. */ void *fs_texfetch_col_msaa[5][PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES]; void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES]; /* FS which outputs an average of all samples. */ void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; + /* FS which unpacks color to ZS or packs ZS to color, matching +* the ZS format. See util_blitter_get_color_format_for_zs(). +*/ + void *fs_pack_color_zs[TGSI_TEXTURE_COUNT][10]; + /* Blend state. */ void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */ void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1]; /* Depth stencil alpha state. */ void *dsa_write_depth_stencil; void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; @@ -504,20 +509,27 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i]); if (ctx->fs_texfetch_stencil_msaa[i]) ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i]); for (j = 0; j< ARRAY_SIZE(ctx->fs_resolve[i]); j++) for (f = 0; f < 2; f++) if (ctx->fs_resolve[i][j][f]) ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]); } + for (i = 0; i < ARRAY_SIZE(ctx->fs_pack_color_zs); i++) { + for (j = 0; j < ARRAY_SIZE(ctx->fs_pack_color_zs[0]); j++) { + if (ctx->fs_pack_color_zs[i][j]) +ctx->delete_fs_state(pipe, ctx->fs_pack_color_zs[i][j]); + } + } + if (ctx->fs_empty) ctx->delete_fs_state(pipe, ctx->fs_empty); if (ctx->fs_write_one_cbuf) ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf); if (ctx->fs_write_all_cbufs) ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs); pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear); pipe->delete_sampler_state(pipe, ctx->sampler_state_rect); pipe->delete_sampler_state(pipe, ctx->sampler_state_linear); @@ -975,20 +987,58 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, *shader = util_make_fragment_tex_shader(pipe, tgsi_tex, TGSI_INTERPOLATE_LINEAR, stype, dtype, ctx->has_tex_lz, use_txf); } return *shader; } } +static inline +void *blitter_get_fs_pack_color_zs(struct blitter_context_priv *ctx, + enum pipe_texture_target target, + unsigned nr_samples, + enum pipe_format zs_format, + bool dst_is_color) +{ + struct pipe_context *pipe = ctx->base.pipe; + enum tgsi_texture_type tgsi_tex = + util_pipe_tex_to_tgsi_tex(target, nr_samples); + int format_index = zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ? 0 : + zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ? 1 : + zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 : + zs_format == PIPE_FORMAT_Z24X8_UNORM ? 3 : + zs_format == PIPE_FORMAT_X8Z24_UNORM ? 4 : -1; + + if (format_index == -1) { + assert(0); + return NULL; + } + + /* The first 5 shaders pack ZS to color, the last 5 shaders unpa
[Mesa-dev] [PATCH 3/3] radeonsi: use a fragment shader blit instead of DB->CB copy for ZS CPU mappings
From: Marek Olšák This mainly removes and simplifies code that is no longer needed. There were some issues with the DB->CB stencil copy on gfx10, so let's just use a fragment shader blit for all ZS mappings. It's more reliable. --- src/gallium/drivers/radeonsi/si_blit.c| 29 +--- src/gallium/drivers/radeonsi/si_pipe.h| 9 +- src/gallium/drivers/radeonsi/si_state.c | 2 +- src/gallium/drivers/radeonsi/si_texture.c | 166 +++--- 4 files changed, 52 insertions(+), 154 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 5806342cca9..638f2ee4d24 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -173,45 +173,20 @@ si_blit_dbcb_copy(struct si_context *sctx, } sctx->decompression_enabled = false; sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); return fully_copied_levels; } -void si_blit_decompress_depth(struct pipe_context *ctx, - struct si_texture *texture, - struct si_texture *staging, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer, - unsigned first_sample, unsigned last_sample) -{ - const struct util_format_description *desc; - unsigned planes = 0; - - assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); - - desc = util_format_description(staging->buffer.b.b.format); - - if (util_format_has_depth(desc)) - planes |= PIPE_MASK_Z; - if (util_format_has_stencil(desc)) - planes |= PIPE_MASK_S; - - si_blit_dbcb_copy( - (struct si_context *)ctx, texture, staging, planes, - u_bit_consecutive(first_level, last_level - first_level + 1), - first_layer, last_layer, first_sample, last_sample); -} - /* Helper function for si_blit_decompress_zs_in_place. */ static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx, struct si_texture *texture, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer; @@ -348,21 +323,21 @@ si_decompress_depth(struct si_context *sctx, u_log_printf(sctx->log, "\n\n" "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", first_level, last_level, levels_z, levels_s); /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ if (copy_planes && (tex->flushed_depth_texture || -si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) { +si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) { struct si_texture *dst = tex->flushed_depth_texture; unsigned fully_copied_levels; unsigned levels = 0; assert(tex->flushed_depth_texture); if (util_format_is_depth_and_stencil(dst->buffer.b.b.format)) copy_planes = PIPE_MASK_Z | PIPE_MASK_S; if (copy_planes & PIPE_MASK_Z) { @@ -1242,21 +1217,21 @@ static void si_blit(struct pipe_context *ctx, assert(util_blitter_is_blit_supported(sctx->blitter, info)); /* The driver doesn't decompress resources automatically while * u_blitter is rendering. */ vi_disable_dcc_if_incompatible_format(sctx, info->src.resource, info->src.level, info->src.format); vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource, info->dst.level, info->dst.format); - si_decompress_subresource(ctx, info->src.resource, info->mask, + si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS, info->src.level, info->src.box.z, info->src.box.z + info->src.box.depth - 1); if (sctx->screen->debug_flags & DBG(FORCE_DMA) && util_try_blit_via_copy_region(ctx, info)) return; si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); diff --git a/src/gallium/drivers/radeonsi/si_p
[Mesa-dev] [PATCH] ac: change ac_query_gpu_info() signatures
From: Emil Velikov Currently libdrm_amdgpu provides a typedef of the various handles. While the goal was to make those opaque, it effectively became part of the API To the best of my knowledge there are two ways to have opaque handles: - "typedef void *foo;" - rather messy IMHO - "stuct foo;" and use "struct foo *" through the API In our case amdgpU_device_handle is used only internally, plus respective code is not used or applicable for r300 and r600. Hence we copied the typedef. Seemingly this will be a problem since a libdrm_amdgpu wants to change the API, while not updating the code(?). Either way, we can safely s/amdgpU_device_handle/void */ and carry on. Cc: Michel Dänzer Signed-off-by: Emil Velikov --- src/amd/common/ac_gpu_info.c | 3 ++- src/amd/common/ac_gpu_info.h | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index db7f9e47ce1..8bd47cc26b3 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -92,7 +92,7 @@ static bool has_syncobj(int fd) return value ? true : false; } -bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, +bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, struct amdgpu_gpu_info *amdinfo) { @@ -104,6 +104,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, struct amdgpu_gds_resource_info gds = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; + amdgpu_device_handle dev = dev_p; drmDevicePtr devinfo; /* Get PCI info. */ diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 11fb77eee87..ba4940af142 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -35,8 +35,6 @@ extern "C" { #endif -/* Prior to C11 the following may trigger a typedef redeclaration warning */ -typedef struct amdgpu_device *amdgpu_device_handle; struct amdgpu_gpu_info; struct radeon_info { @@ -147,7 +145,7 @@ struct radeon_info { uint32_tcik_macrotile_mode_array[16]; }; -bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, +bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, struct amdgpu_gpu_info *amdinfo); -- 2.21.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev