Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-06-21 Thread Marek Olšák
Gfx10 remembers sample positions in compressed Z/S memory, so the hw
doesn't need the decompress pass for shader loads.

Marek

On Wed, May 22, 2019 at 4:20 PM Marek Olšák  wrote:

> The depth decompress pass needs to know the sample locations.
>
> If shader loads read from compressed depth, the texture hardware will
> always use the standard locations for decompression.
>
> Marek
>
> On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
> wrote:
>
>> So this does not seem to use the sample locations during layout
>> transitions?
>>
>> AFAIK those are needed for e.g. HTILE decompression as it is based on
>> equations somehow.
>>
>> On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
>>  wrote:
>> >
>> > Basically, this extension allows applications to use custom
>> > sample locations. It doesn't support variable sample locations
>> > during subpass. Note that we don't have to upload the user
>> > sample locations because the spec doesn't allow this.
>> >
>> > Only enabled on VI+ because it's untested on older chips.
>> >
>> > v2: - change sampleLocationCoordinateRange[1] to 0.9375
>> > - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>> > - rebased on top of master
>> > - some cleanups
>> >
>> > Signed-off-by: Samuel Pitoiset 
>> > ---
>> >  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
>> >  src/amd/vulkan/radv_device.c  |  27 
>> >  src/amd/vulkan/radv_extensions.py |   1 +
>> >  src/amd/vulkan/radv_pipeline.c|  30 
>> >  src/amd/vulkan/radv_private.h |  26 +++-
>> >  5 files changed, 300 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> > index 4f592bc7f68..fb79c1c6713 100644
>> > --- a/src/amd/vulkan/radv_cmd_buffer.c
>> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > dest->viewport.count = src->viewport.count;
>> > dest->scissor.count = src->scissor.count;
>> > dest->discard_rectangle.count = src->discard_rectangle.count;
>> > +   dest->sample_location.count = src->sample_location.count;
>> >
>> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>> > if (memcmp(&dest->viewport.viewports,
>> &src->viewport.viewports,
>> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > }
>> > }
>> >
>> > +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
>> > +   if (dest->sample_location.per_pixel !=
>> src->sample_location.per_pixel ||
>> > +   dest->sample_location.grid_size.width !=
>> src->sample_location.grid_size.width ||
>> > +   dest->sample_location.grid_size.height !=
>> src->sample_location.grid_size.height ||
>> > +   memcmp(&dest->sample_location.locations,
>> > +  &src->sample_location.locations,
>> > +  src->sample_location.count *
>> sizeof(VkSampleLocationEXT))) {
>> > +   dest->sample_location.per_pixel =
>> src->sample_location.per_pixel;
>> > +   dest->sample_location.grid_size =
>> src->sample_location.grid_size;
>> > +   typed_memcpy(dest->sample_location.locations,
>> > +src->sample_location.locations,
>> > +src->sample_location.count);
>> > +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
>> > +   }
>> > +   }
>> > +
>> > cmd_buffer->state.dirty |= dest_mask;
>> >  }
>> >
>> > @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
>> radv_cmd_buffer *cmd_buffer,
>> > }
>> >  }
>> >
>> > +/**
>> > + * Convert the user sample locations to hardware sample locations (the
>> values
>> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
>> > + */
>> > +static void
>> > +radv_convert_user_sample_locs(struct radv_sample_locations_state
>> *state,
>> > + uint32_t x, uint32_t y, VkOffset2D
>> *sample_locs)
>> > +{
>> > +   uint32_t x_offset = x % state->grid_size.width;
>> > +   uint32_t y_offset = y % state->grid_size.height;
>> > +   uint32_t num_samples = (uint32_t)state->per_pixel;
>> > +   VkSampleLocationEXT *user_locs;
>> > +   uint32_t pixel_offset;
>> > +
>> > +   pixel_offset = (x_offset + y_offset * state->grid_size.width) *
>> num_samples;
>> > +
>> > +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
>> > +   user_locs = &state->locations[pixel_offset];
>> > +
>> > +   for (uint32_t i = 0; i < num_samples; i++) {
>> > +   float shifted_pos_x = user_locs[i].x - 0.5;
>> > +   float shifted_pos_y = user_locs[i].y - 0.5;
>> > +
>> > +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
>> > +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
>> > +
>> > + 

[Mesa-dev] [PATCH] android: virgl: fix generated virgl_driinfo.h building rules

2019-06-21 Thread Mauro Rossi
Changelog in Android makefile:
- Add LOCAL_MODULE_CLASS, intermediates and LOCAL_GENERATED_SOURCES
- Use LOCAL_EXPORT_C_INCLUDE_DIRS to export $(intermediates) path
- Move generated header rules before 'include $(BUILD_STATIC_LIBRARY)'

Fixes the following building error:

In file included from external/mesa/src/gallium/targets/dri/target.c:1:
external/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h:257:16:
fatal error: 'virgl/virgl_driinfo.h' file not found
  #include "virgl/virgl_driinfo.h"
   ^~~
1 error generated.

Fixes: cf800998a ("virgl: Add driinfo file and tie it into the build")
Signed-off-by: Mauro Rossi 
---
 src/gallium/drivers/virgl/Android.mk | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/virgl/Android.mk 
b/src/gallium/drivers/virgl/Android.mk
index f77bcf196e..585ed7b2ce 100644
--- a/src/gallium/drivers/virgl/Android.mk
+++ b/src/gallium/drivers/virgl/Android.mk
@@ -30,8 +30,9 @@ LOCAL_SRC_FILES := \
 
 LOCAL_MODULE := libmesa_pipe_virgl
 
-include $(GALLIUM_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES += $(intermediates)/virgl/virgl_driinfo.h
 
 GEN_DRIINFO_INPUTS := \
$(MESA_TOP)/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h \
@@ -44,6 +45,11 @@ $(intermediates)/virgl/virgl_driinfo.h: $(MERGE_DRIINFO) 
$(GEN_DRIINFO_INPUTS)
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
$(hide) $(MESA_PYTHON2) $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS) > $@ || 
($(RM) $@; false)
 
+LOCAL_EXPORT_C_INCLUDE_DIRS += $(intermediates)
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
 ifneq ($(HAVE_GALLIUM_VIRGL),)
 GALLIUM_TARGET_DRIVERS += virtio_gpu
 $(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl_common 
libmesa_winsys_virgl libmesa_winsys_virgl_vtest)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: change ac_query_gpu_info() signatures

2019-06-21 Thread Marek Olšák
On Fri, Jun 21, 2019 at 1:13 PM Emil Velikov 
wrote:

> From: Emil Velikov 
>
> Currently libdrm_amdgpu provides a typedef of the various handles. While
> the goal was to make those opaque, it effectively became part of the API
>
> To the best of my knowledge there are two ways to have opaque handles:
>  - "typedef void *foo;" - rather messy IMHO
>  - "stuct foo;" and use "struct foo *" through the API
>
> In our case amdgpU_device_handle is used only internally, plus
> respective code is not used or applicable for r300 and r600. Hence  we
> copied the typedef.
>
> Seemingly this will be a problem since a libdrm_amdgpu wants to change
> the API, while not updating the code(?).
>

libdrm_amdgpu can't change the API.

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/6] gallium/util: Make it possible to disable persistent maps in the upload manager

2019-06-21 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 19, 2019 at 4:42 AM Thomas Hellström (VMware) <
tho...@shipmail.org> wrote:

> From: Thomas Hellstrom 
>
> For svga, the use of persistent / coherent maps is typically slightly
> slower than without them. It's probably a bit case-dependent and
> possible to tune, but for now, make sure we can disable those.
>
> Signed-off-by: Thomas Hellstrom 
> Reviewed-by: Brian Paul 
> ---
>  src/gallium/auxiliary/util/u_upload_mgr.c | 14 --
>  src/gallium/auxiliary/util/u_upload_mgr.h |  4 
>  2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c
> b/src/gallium/auxiliary/util/u_upload_mgr.c
> index c2c0ba957e3..73f6cae0b6d 100644
> --- a/src/gallium/auxiliary/util/u_upload_mgr.c
> +++ b/src/gallium/auxiliary/util/u_upload_mgr.c
> @@ -106,8 +106,10 @@ u_upload_clone(struct pipe_context *pipe, struct
> u_upload_mgr *upload)
> struct u_upload_mgr *result = u_upload_create(pipe,
> upload->default_size,
>   upload->bind,
> upload->usage,
>   upload->flags);
> -   if (upload->map_persistent &&
> -   upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT)
> +   if (!upload->map_persistent && result->map_persistent)
> +  u_upload_disable_persistent(result);
> +   else if (upload->map_persistent &&
> +upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT)
>u_upload_enable_flush_explicit(result);
>
> return result;
> @@ -121,6 +123,14 @@ u_upload_enable_flush_explicit(struct u_upload_mgr
> *upload)
> upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
>  }
>
> +void
> +u_upload_disable_persistent(struct u_upload_mgr *upload)
> +{
> +   upload->map_persistent = FALSE;
> +   upload->map_flags &= ~(PIPE_TRANSFER_COHERENT |
> PIPE_TRANSFER_PERSISTENT);
> +   upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
> +}
> +
>  static void
>  upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying)
>  {
> diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h
> b/src/gallium/auxiliary/util/u_upload_mgr.h
> index 80832016272..6a4a60963fe 100644
> --- a/src/gallium/auxiliary/util/u_upload_mgr.h
> +++ b/src/gallium/auxiliary/util/u_upload_mgr.h
> @@ -73,6 +73,10 @@ u_upload_clone(struct pipe_context *pipe, struct
> u_upload_mgr *upload);
>  void
>  u_upload_enable_flush_explicit(struct u_upload_mgr *upload);
>
> +/** Whether to avoid persistent mappings where available */
> +void
> +u_upload_disable_persistent(struct u_upload_mgr *upload);
> +
>  /**
>   * Destroy the upload manager.
>   */
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface

2019-06-21 Thread Alyssa Rosenzweig
Both patches pushed :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/9] panfrost: Report UBO count

2019-06-21 Thread Alyssa Rosenzweig
We look at the highest set bit in the UBO enable mask to work out the
maximum indexable UBO, i.e. the UBO count as we need to report to the
hardware.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 5af6dcdb9c3..6fde645a12c 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -981,6 +981,17 @@ panfrost_map_constant_buffer_cpu(struct 
panfrost_constant_buffer *buf, unsigned
 unreachable("No constant buffer");
 }
 
+/* Compute number of UBOs active (more specifically, compute the highest UBO
+ * number addressable -- if there are gaps, include them in the count anyway)
+ * */
+
+static unsigned
+panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
+{
+unsigned mask = ctx->constant_buffer[stage].enabled_mask;
+return 32 - __builtin_clz(mask);
+}
+
 /* Go through dirty flags and actualise them in the cmdstream. */
 
 void
@@ -1060,8 +1071,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 ctx->fragment_shader_core.midgard1.work_count = 
/*MAX2(ctx->fragment_shader_core.midgard1.work_count, 
ctx->blend->blend_work_count)*/16;
 
 /* Set late due to depending on render state */
-/* The one at the end seems to mean "1 UBO" */
-unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | 0x1;
+
+/* The bottom bits seem to mean UBO count */
+unsigned ubo_count = panfrost_ubo_count(ctx, 
PIPE_SHADER_FRAGMENT);
+unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | ubo_count;
 
 /* Any time texturing is used, derivatives are implicitly
  * calculated, so we need to enable helper invocations */
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/9] panfrost: Constant buffer refactor

2019-06-21 Thread Alyssa Rosenzweig
We refactor panfrost_constant_buffer to mirror v3d's constant buffer
handling, to enable UBOs as well as a single set of uniforms.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 66 ++
 src/gallium/drivers/panfrost/pan_context.h |  6 +-
 2 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index a803a8b68cb..5af6dcdb9c3 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -967,6 +967,20 @@ static void panfrost_upload_sysvals(struct 
panfrost_context *ctx, void *buf,
 }
 }
 
+static const void *
+panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, 
unsigned index)
+{
+struct pipe_constant_buffer *cb = &buf->cb[index];
+struct panfrost_resource *rsrc = pan_resource(cb->buffer);
+
+if (rsrc)
+return rsrc->bo->cpu;
+else if (cb->user_buffer)
+return cb->user_buffer;
+else
+unreachable("No constant buffer");
+}
+
 /* Go through dirty flags and actualise them in the cmdstream. */
 
 void
@@ -1193,16 +1207,23 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 struct panfrost_shader_state *fs = 
&ctx->fs->variants[ctx->fs->active_variant];
 struct panfrost_shader_state *ss = (i == PIPE_SHADER_FRAGMENT) 
? fs : vs;
 
+/* Uniforms are implicitly UBO #0 */
+bool has_uniforms = buf->enabled_mask & (1 << 0);
+
 /* Allocate room for the sysval and the uniforms */
 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
-size_t size = sys_size + buf->size;
+size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) 
: 0;
+size_t size = sys_size + uniform_size;
 struct panfrost_transfer transfer = 
panfrost_allocate_transient(ctx, size);
 
 /* Upload sysvals requested by the shader */
 panfrost_upload_sysvals(ctx, transfer.cpu, ss, i);
 
 /* Upload uniforms */
-memcpy(transfer.cpu + sys_size, buf->buffer, buf->size);
+if (has_uniforms) {
+const void *cpu = 
panfrost_map_constant_buffer_cpu(buf, 0);
+memcpy(transfer.cpu + sys_size, cpu, uniform_size);
+}
 
 int uniform_count = 0;
 
@@ -1236,7 +1257,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 postfix->uniforms = transfer.gpu;
 postfix->uniform_buffers = ubufs;
 
-buf->dirty = 0;
+buf->dirty_mask = 0;
 }
 
 /* TODO: Upload the viewport somewhere more appropriate */
@@ -1940,43 +1961,18 @@ panfrost_set_constant_buffer(
 struct panfrost_context *ctx = pan_context(pctx);
 struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
 
-size_t sz = buf ? buf->buffer_size : 0;
-
-/* Free previous buffer */
-
-pbuf->dirty = true;
-pbuf->size = sz;
+util_copy_constant_buffer(&pbuf->cb[index], buf);
 
-if (pbuf->buffer) {
-ralloc_free(pbuf->buffer);
-pbuf->buffer = NULL;
-}
-
-/* If unbinding, we're done */
+unsigned mask = (1 << index);
 
-if (!buf)
-return;
-
-/* Multiple constant buffers not yet supported */
-assert(index == 0);
-
-const uint8_t *cpu;
-
-struct panfrost_resource *rsrc = (struct panfrost_resource *) 
(buf->buffer);
-
-if (rsrc) {
-cpu = rsrc->bo->cpu;
-} else if (buf->user_buffer) {
-cpu = buf->user_buffer;
-} else {
-DBG("No constant buffer?\n");
+if (unlikely(!buf)) {
+pbuf->enabled_mask &= ~mask;
+pbuf->dirty_mask &= ~mask;
 return;
 }
 
-/* Copy the constant buffer into the driver context for later upload */
-
-pbuf->buffer = rzalloc_size(ctx, sz);
-memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz);
+pbuf->enabled_mask |= mask;
+pbuf->dirty_mask |= mask;
 }
 
 static void
diff --git a/src/gallium/drivers/panfrost/pan_context.h 
b/src/gallium/drivers/panfrost/pan_context.h
index 1f718bcd9c4..21d1d4c8d46 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -67,9 +67,9 @@ struct prim_convert_context;
lval &= ~(bit);
 
 struct panfrost_constant_buffer {
-bool dirty;
-size_t size;
-void *buffer;
+struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+uint32_t enabled_mask;
+uint32_t dirty_mask;
 };
 
 st

[Mesa-dev] [PATCH 5/9] panfrost: Identify "uniform buffer count" bits

2019-06-21 Thread Alyssa Rosenzweig
We've known about this for a while, but it was never formally in the
machine header files / decoder, so let's add them in.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/include/panfrost-job.h| 15 +++
 src/gallium/drivers/panfrost/pan_context.c | 18 +-
 .../drivers/panfrost/pandecode/decode.c| 10 +-
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h 
b/src/gallium/drivers/panfrost/include/panfrost-job.h
index 6da86148cd7..fbef4efdc32 100644
--- a/src/gallium/drivers/panfrost/include/panfrost-job.h
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -399,7 +399,7 @@ enum mali_format {
 #define MALI_ALPHA_COVERAGE(clampf) ((uint16_t) (int) (clampf * 15.0f))
 #define MALI_GET_ALPHA_COVERAGE(nibble) ((float) nibble / 15.0f)
 
-/* Applies to unknown1 */
+/* Applies to midgard1.flags */
 
 /* Should the hardware perform early-Z testing? Normally should be set
  * for performance reasons. Clear if you use: discard,
@@ -407,19 +407,19 @@ enum mali_format {
  * forward-pixel kill; we're not quite sure which bit is which yet.
  * TODO: How does this interact with blending?*/
 
-#define MALI_EARLY_Z (1 << 10)
+#define MALI_EARLY_Z (1 << 6)
 
 /* Should the hardware calculate derivatives (via helper invocations)? Set in a
  * fragment shader that uses texturing or derivative functions */
 
-#define MALI_HELPER_INVOCATIONS (1 << 11)
+#define MALI_HELPER_INVOCATIONS (1 << 7)
 
 /* Flags denoting the fragment shader's use of tilebuffer readback. If the
  * shader might read any part of the tilebuffer, set MALI_READS_TILEBUFFER. If
  * it might read depth/stencil in particular, also set MALI_READS_ZS */
 
-#define MALI_READS_ZS (1 << 12)
-#define MALI_READS_TILEBUFFER (1 << 16)
+#define MALI_READS_ZS (1 << 8)
+#define MALI_READS_TILEBUFFER (1 << 12)
 
 /* The raw Midgard blend payload can either be an equation or a shader
  * address, depending on the context */
@@ -538,9 +538,8 @@ struct mali_shader_meta {
 u32 unk1 : 28; // = 0x80 for vertex, 0x958020 for 
tiler
 } bifrost1;
 struct {
-/* 0x200 except MALI_NO_ALPHA_TO_COVERAGE. Mysterious 1
- * other times. Who knows really? */
-u16 unknown1;
+unsigned uniform_buffer_count : 4;
+unsigned flags : 12;
 
 /* Whole number of uniform registers used, times two;
  * whole number of work registers used (no scale).
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 34eafdfdf0b..b3950950b4f 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1049,7 +1049,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 vs->tripipe->sampler_count = 
ctx->sampler_count[PIPE_SHADER_VERTEX];
 
 /* Who knows */
-vs->tripipe->midgard1.unknown1 = 0x2201;
+vs->tripipe->midgard1.flags = 0x220;
+vs->tripipe->midgard1.uniform_buffer_count = 1;
 
 ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 
4;
 }
@@ -1088,11 +1089,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 if (ctx->blend->has_blend_shader)
 ctx->fragment_shader_core.midgard1.work_count = 
/*MAX2(ctx->fragment_shader_core.midgard1.work_count, 
ctx->blend->blend_work_count)*/16;
 
-/* Set late due to depending on render state */
-
-/* The bottom bits seem to mean UBO count */
 unsigned ubo_count = panfrost_ubo_count(ctx, 
PIPE_SHADER_FRAGMENT);
-unsigned flags = MALI_EARLY_Z | 0x200 | 0x2000 | ubo_count;
+ctx->fragment_shader_core.midgard1.uniform_buffer_count = 
ubo_count;
+
+/* Set late due to depending on render state */
+unsigned flags = MALI_EARLY_Z | 0x20 | 0x200;
 
 /* Any time texturing is used, derivatives are implicitly
  * calculated, so we need to enable helper invocations */
@@ -1100,7 +1101,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_vertex_data)
 if (ctx->sampler_view_count[PIPE_SHADER_FRAGMENT])
 flags |= MALI_HELPER_INVOCATIONS;
 
-ctx->fragment_shader_core.midgard1.unknown1 = flags;
+ctx->fragment_shader_core.midgard1.flags = flags;
 
 /* Assign texture/sample count right before upload */
 ctx->fragment_shader_core.texture_count = 
ctx->sampler_view_count[PIPE_SHADER_FRAGMENT];
@@ -1119,9 +1120,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool 
with_ver

[Mesa-dev] [PATCH 3/9] panfrost: Allow for dynamic UBO count

2019-06-21 Thread Alyssa Rosenzweig
We already uploaded UBOs, but only a fixed number (1) for uniforms;
let's upload as many as we compute we need.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 27 +-
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 6fde645a12c..b1c234fba7c 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -982,13 +982,14 @@ panfrost_map_constant_buffer_cpu(struct 
panfrost_constant_buffer *buf, unsigned
 }
 
 /* Compute number of UBOs active (more specifically, compute the highest UBO
- * number addressable -- if there are gaps, include them in the count anyway)
- * */
+ * number addressable -- if there are gaps, include them in the count anyway).
+ * We always include UBO #0 in the count, since we *need* uniforms enabled for
+ * sysvals. */
 
 static unsigned
 panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
 {
-unsigned mask = ctx->constant_buffer[stage].enabled_mask;
+unsigned mask = ctx->constant_buffer[stage].enabled_mask | 1;
 return 32 - __builtin_clz(mask);
 }
 
@@ -1257,16 +1258,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 unreachable("Invalid shader stage\n");
 }
 
-/* Also attach the same buffer as a UBO for extended access */
+/* Next up, attach UBOs. UBO #0 is the uniforms we just
+ * uploaded */
 
-struct mali_uniform_buffer_meta uniform_buffers[] = {
-{
-.size = MALI_POSITIVE((2 + uniform_count)),
-.ptr = transfer.gpu >> 2,
-},
-};
+unsigned ubo_count = panfrost_ubo_count(ctx, i);
+assert(ubo_count >= 1);
 
-mali_ptr ubufs = panfrost_upload_transient(ctx, 
uniform_buffers, sizeof(uniform_buffers));
+size_t sz = sizeof(struct mali_uniform_buffer_meta) * 
ubo_count;
+struct mali_uniform_buffer_meta *ubos = calloc(sz, 1);
+
+/* Upload uniforms as a UBO */
+ubos[0].size = MALI_POSITIVE((2 + uniform_count));
+ubos[0].ptr = transfer.gpu >> 2;
+
+mali_ptr ubufs = panfrost_upload_transient(ctx, ubos, sz);
 postfix->uniforms = transfer.gpu;
 postfix->uniform_buffers = ubufs;
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/9] panfrost: Handle disabled/empty UBOs

2019-06-21 Thread Alyssa Rosenzweig
Prevents an assert(0) later in this (not so edge) case. We still have to
have a dummy there.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index b3950950b4f..4d935f8d984 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1291,9 +1291,21 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 /* The rest are honest-to-goodness UBOs */
 
 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
-mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, 
buf, ubo);
 size_t sz = buf->cb[ubo].buffer_size;
 
+bool enabled = buf->enabled_mask & (1 << ubo);
+bool empty = sz == 0;
+
+if (!enabled || empty) {
+/* Stub out disabled UBOs to catch accesses */
+
+ubos[ubo].size = 0;
+ubos[ubo].ptr = 0xDEAD;
+continue;
+}
+
+mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, 
buf, ubo);
+
 unsigned bytes_per_field = 16;
 unsigned aligned = ALIGN(sz, bytes_per_field);
 unsigned fields = aligned / bytes_per_field;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/9] panfrost: Initial UBO implementation

2019-06-21 Thread Alyssa Rosenzweig
This implements loads from direct vec4-aligned fields in UBOs. More
future work is needed for indirect or unaligned loads (just compiler changes), 
but the core command stream work is handled here.

Alyssa Rosenzweig (9):
  panfrost: Constant buffer refactor
  panfrost: Report UBO count
  panfrost: Allow for dynamic UBO count
  panfrost: Upload UBOs
  panfrost: Identify "uniform buffer count" bits
  panfrost: Handle disabled/empty UBOs
  panfrost/midgard: Implement UBO reads
  panfrost: DRY between shader stage setup
  panfrost: Allow up to 16 UBOs

 .../drivers/panfrost/include/panfrost-job.h   |  15 +-
 .../panfrost/midgard/midgard_compile.c|  65 --
 src/gallium/drivers/panfrost/pan_context.c| 198 --
 src/gallium/drivers/panfrost/pan_context.h|   6 +-
 src/gallium/drivers/panfrost/pan_screen.c |   2 +-
 .../drivers/panfrost/pandecode/decode.c   |  10 +-
 6 files changed, 203 insertions(+), 93 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/9] panfrost: Upload UBOs

2019-06-21 Thread Alyssa Rosenzweig
Now that all the counting is sorted, it's a matter of passing along a
GPU address and going.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index b1c234fba7c..34eafdfdf0b 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -981,6 +981,23 @@ panfrost_map_constant_buffer_cpu(struct 
panfrost_constant_buffer *buf, unsigned
 unreachable("No constant buffer");
 }
 
+static mali_ptr
+panfrost_map_constant_buffer_gpu(
+struct panfrost_context *ctx,
+struct panfrost_constant_buffer *buf,
+unsigned index)
+{
+struct pipe_constant_buffer *cb = &buf->cb[index];
+struct panfrost_resource *rsrc = pan_resource(cb->buffer);
+
+if (rsrc)
+return rsrc->bo->gpu;
+else if (cb->user_buffer)
+return panfrost_upload_transient(ctx, cb->user_buffer, 
cb->buffer_size);
+else
+unreachable("No constant buffer");
+}
+
 /* Compute number of UBOs active (more specifically, compute the highest UBO
  * number addressable -- if there are gaps, include them in the count anyway).
  * We always include UBO #0 in the count, since we *need* uniforms enabled for
@@ -1271,6 +1288,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 ubos[0].size = MALI_POSITIVE((2 + uniform_count));
 ubos[0].ptr = transfer.gpu >> 2;
 
+/* The rest are honest-to-goodness UBOs */
+
+for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
+mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, 
buf, ubo);
+size_t sz = buf->cb[ubo].buffer_size;
+
+unsigned bytes_per_field = 16;
+unsigned aligned = ALIGN(sz, bytes_per_field);
+unsigned fields = aligned / bytes_per_field;
+
+ubos[ubo].size = MALI_POSITIVE(fields);
+ubos[ubo].ptr = gpu >> 2;
+}
+
 mali_ptr ubufs = panfrost_upload_transient(ctx, ubos, sz);
 postfix->uniforms = transfer.gpu;
 postfix->uniform_buffers = ubufs;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/9] panfrost: DRY between shader stage setup

2019-06-21 Thread Alyssa Rosenzweig
Just a little spring cleanup, extending UBOs to vertex shaders in the
process.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 55 ++
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 4d935f8d984..b8ad19cf0c6 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1010,6 +1010,26 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum 
pipe_shader_type stage)
 return 32 - __builtin_clz(mask);
 }
 
+/* Fixes up a shader state with current state, returning a GPU address to the
+ * patched shader */
+
+static mali_ptr
+panfrost_patch_shader_state(
+struct panfrost_context *ctx,
+struct panfrost_shader_state *ss,
+enum pipe_shader_type stage)
+{
+ss->tripipe->texture_count = ctx->sampler_view_count[stage];
+ss->tripipe->sampler_count = ctx->sampler_count[stage];
+
+ss->tripipe->midgard1.flags = 0x220;
+
+unsigned ubo_count = panfrost_ubo_count(ctx, stage);
+ss->tripipe->midgard1.uniform_buffer_count = ubo_count;
+
+return ss->tripipe_gpu;
+}
+
 /* Go through dirty flags and actualise them in the cmdstream. */
 
 void
@@ -1043,16 +1063,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 
 struct panfrost_shader_state *vs = 
&ctx->vs->variants[ctx->vs->active_variant];
 
-/* Late shader descriptor assignments */
-
-vs->tripipe->texture_count = 
ctx->sampler_view_count[PIPE_SHADER_VERTEX];
-vs->tripipe->sampler_count = 
ctx->sampler_count[PIPE_SHADER_VERTEX];
-
-/* Who knows */
-vs->tripipe->midgard1.flags = 0x220;
-vs->tripipe->midgard1.uniform_buffer_count = 1;
-
-ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 
4;
+ctx->payload_vertex.postfix._shader_upper =
+panfrost_patch_shader_state(ctx, vs, 
PIPE_SHADER_VERTEX) >> 4;
 }
 
 if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
@@ -1074,13 +1086,20 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 assert(ctx->fs);
 struct panfrost_shader_state *variant = 
&ctx->fs->variants[ctx->fs->active_variant];
 
+panfrost_patch_shader_state(ctx, variant, 
PIPE_SHADER_FRAGMENT);
+
 #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
 
 COPY(shader);
 COPY(attribute_count);
 COPY(varying_count);
+COPY(texture_count);
+COPY(sampler_count);
+COPY(sampler_count);
 COPY(midgard1.uniform_count);
+COPY(midgard1.uniform_buffer_count);
 COPY(midgard1.work_count);
+COPY(midgard1.flags);
 COPY(midgard1.unknown2);
 
 #undef COPY
@@ -1089,11 +1108,14 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 if (ctx->blend->has_blend_shader)
 ctx->fragment_shader_core.midgard1.work_count = 
/*MAX2(ctx->fragment_shader_core.midgard1.work_count, 
ctx->blend->blend_work_count)*/16;
 
-unsigned ubo_count = panfrost_ubo_count(ctx, 
PIPE_SHADER_FRAGMENT);
-ctx->fragment_shader_core.midgard1.uniform_buffer_count = 
ubo_count;
-
 /* Set late due to depending on render state */
-unsigned flags = MALI_EARLY_Z | 0x20 | 0x200;
+unsigned flags = ctx->fragment_shader_core.midgard1.flags;
+
+/* Depending on whether it's legal to in the given shader, we
+ * try to enable early-z testing (or forward-pixel kill?) */
+
+if (!variant->can_discard)
+flags |= MALI_EARLY_Z;
 
 /* Any time texturing is used, derivatives are implicitly
  * calculated, so we need to enable helper invocations */
@@ -1103,10 +1125,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, 
bool with_vertex_data)
 
 ctx->fragment_shader_core.midgard1.flags = flags;
 
-/* Assign texture/sample count right before upload */
-ctx->fragment_shader_core.texture_count = 
ctx->sampler_view_count[PIPE_SHADER_FRAGMENT];
-ctx->fragment_shader_core.sampler_count = 
ctx->sampler_count[PIPE_SHADER_FRAGMENT];
-
 /* Assign the stencil refs late */
 ctx->fragment_shader_core.stencil_front.ref = 
ctx->stencil_ref.ref_value[0];
 ctx->fragment_shader_core.stencil_back.ref = 
ctx->stencil_ref.ref_value[1];
@@ -1120,7 +1138,6 @@ panfrost_emit_for_draw(struct panfrost_context *ct

[Mesa-dev] [PATCH 7/9] panfrost/midgard: Implement UBO reads

2019-06-21 Thread Alyssa Rosenzweig
UBOs and uniforms now use a common code path with an explicit `index`
argument passed, enabling UBO reads.

Signed-off-by: Alyssa Rosenzweig 
---
 .../panfrost/midgard/midgard_compile.c| 65 +++
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c 
b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index baf637d666a..938872cc09e 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -1069,12 +1069,20 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
 #undef ALU_CASE
 
+/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
+ * optimized) versions of UBO #0 */
+
 static void
-emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, 
nir_src *indirect_offset)
+emit_ubo_read(
+compiler_context *ctx,
+unsigned dest,
+unsigned offset,
+nir_src *indirect_offset,
+unsigned index)
 {
 /* TODO: half-floats */
 
-if (!indirect_offset && offset < ctx->uniform_cutoff) {
+if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) {
 /* Fast path: For the first 16 uniforms, direct accesses are
  * 0-cycle, since they're just a register fetch in the usual
  * case.  So, we alias the registers while we're still in
@@ -1095,11 +1103,13 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, 
unsigned offset, nir_src
 
 if (indirect_offset) {
 emit_indirect_offset(ctx, indirect_offset);
-ins.load_store.unknown = 0x8700; /* xxx: what is this? 
*/
+ins.load_store.unknown = 0x8700 | index; /* xxx: what 
is this? */
 } else {
-ins.load_store.unknown = 0x1E00; /* xxx: what is this? 
*/
+ins.load_store.unknown = 0x1E00 | index; /* xxx: what 
is this? */
 }
 
+/* TODO respect index */
+
 emit_mir_instruction(ctx, ins);
 }
 }
@@ -1152,7 +1162,7 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr)
 unsigned uniform = ((uintptr_t) val) - 1;
 
 /* Emit the read itself -- this is never indirect */
-emit_uniform_read(ctx, dest, uniform, NULL);
+emit_ubo_read(ctx, dest, uniform, NULL, 0);
 }
 
 /* Reads RGBA value from the tilebuffer and converts to a RGBA32F register,
@@ -1231,7 +1241,7 @@ emit_fb_read_blend_scalar(compiler_context *ctx, unsigned 
reg)
 static void
 emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
-unsigned offset, reg;
+unsigned offset = 0, reg;
 
 switch (instr->intrinsic) {
 case nir_intrinsic_discard_if:
@@ -1250,23 +1260,49 @@ emit_intrinsic(compiler_context *ctx, 
nir_intrinsic_instr *instr)
 }
 
 case nir_intrinsic_load_uniform:
-case nir_intrinsic_load_input:
-offset = nir_intrinsic_base(instr);
+case nir_intrinsic_load_ubo:
+case nir_intrinsic_load_input: {
+bool is_uniform = instr->intrinsic == 
nir_intrinsic_load_uniform;
+bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
+
+if (!is_ubo) {
+offset = nir_intrinsic_base(instr);
+}
 
 unsigned nr_comp = nir_intrinsic_dest_components(instr);
-bool direct = nir_src_is_const(instr->src[0]);
 
-if (direct) {
-offset += nir_src_as_uint(instr->src[0]);
-}
+nir_src *src_offset = nir_get_io_offset_src(instr);
+
+bool direct = nir_src_is_const(*src_offset);
+
+if (direct)
+offset += nir_src_as_uint(*src_offset);
 
 /* We may need to apply a fractional offset */
 int component = instr->intrinsic == nir_intrinsic_load_input ?
 nir_intrinsic_component(instr) : 0;
 reg = nir_dest_index(ctx, &instr->dest);
 
-if (instr->intrinsic == nir_intrinsic_load_uniform && 
!ctx->is_blend) {
-emit_uniform_read(ctx, reg, ctx->sysval_count + 
offset, !direct ? &instr->src[0] : NULL);
+if (is_uniform && !ctx->is_blend) {
+emit_ubo_read(ctx, reg, ctx->sysval_count + offset, 
!direct ? &instr->src[0] : NULL, 0);
+} else if (is_ubo) {
+nir_src index = instr->src[0];
+
+/* We don't yet support indirect UBOs. For indirect
+ * block numbers (if that's possible), we don't know
+ * enough about the hardware yet. For indirect sources,
+

[Mesa-dev] [PATCH 9/9] panfrost: Allow up to 16 UBOs

2019-06-21 Thread Alyssa Rosenzweig
This is the hardware max, as far as I can tell.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 5d3acc0a0dd..bff63f469c3 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -321,7 +321,7 @@ panfrost_get_shader_param(struct pipe_screen *screen,
 return 16 * 1024 * sizeof(float);
 
 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-return 4;
+return 16;
 
 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
 return 0;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/8] amd/rtld: update the ELF representation of LDS symbols

2019-06-21 Thread Bas Nieuwenhuizen
Marek, I thought you also r-b'd this?

Either way r-b.

On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák  wrote:
>
> From: Nicolai Hähnle 
>
> The initial prototype used a processor-specific symbol type, but
> feedback suggests that an approach using processor-specific section
> name that encodes the alignment analogous to SHN_COMMON symbols is
> preferred.
>
> This patch keeps both variants around for now to reduce problems
> with LLVM compatibility as we switch branches around.
>
> This also cleans up the error reporting in this function.
> ---
>  src/amd/common/ac_rtld.c | 34 +++---
>  1 file changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
> index 57d6b0151b4..ebf64d91658 100644
> --- a/src/amd/common/ac_rtld.c
> +++ b/src/amd/common/ac_rtld.c
> @@ -32,21 +32,25 @@
>
>  #include "ac_binary.h"
>  #include "ac_gpu_info.h"
>  #include "util/u_dynarray.h"
>  #include "util/u_math.h"
>
>  // Old distributions may not have this enum constant
>  #define MY_EM_AMDGPU 224
>
>  #ifndef STT_AMDGPU_LDS
> -#define STT_AMDGPU_LDS 13
> +#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
> +#endif
> +
> +#ifndef SHN_AMDGPU_LDS
> +#define SHN_AMDGPU_LDS 0xff00
>  #endif
>
>  #ifndef R_AMDGPU_NONE
>  #define R_AMDGPU_NONE 0
>  #define R_AMDGPU_ABS32_LO 1
>  #define R_AMDGPU_ABS32_HI 2
>  #define R_AMDGPU_ABS64 3
>  #define R_AMDGPU_REL32 4
>  #define R_AMDGPU_REL64 5
>  #define R_AMDGPU_ABS32 6
> @@ -169,47 +173,60 @@ static bool layout_symbols(struct ac_rtld_symbol 
> *symbols, unsigned num_symbols,
>   * Read LDS symbols from the given \p section of the ELF of \p part and 
> append
>   * them to the LDS symbols list.
>   *
>   * Shared LDS symbols are filtered out.
>   */
>  static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
>  unsigned part_idx,
>  Elf_Scn *section,
>  uint32_t *lds_end_align)
>  {
> -#define report_elf_if(cond) \
> +#define report_if(cond) \
> do { \
> if ((cond)) { \
> report_errorf(#cond); \
> return false; \
> } \
> } while (false)
> +#define report_elf_if(cond) \
> +   do { \
> +   if ((cond)) { \
> +   report_elf_errorf(#cond); \
> +   return false; \
> +   } \
> +   } while (false)
>
> struct ac_rtld_part *part = &binary->parts[part_idx];
> Elf64_Shdr *shdr = elf64_getshdr(section);
> uint32_t strtabidx = shdr->sh_link;
> Elf_Data *symbols_data = elf_getdata(section, NULL);
> report_elf_if(!symbols_data);
>
> const Elf64_Sym *symbol = symbols_data->d_buf;
> size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
>
> for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
> -   if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
> +   struct ac_rtld_symbol s = {};
> +
> +   if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
> +   /* old-style LDS symbols from initial prototype -- 
> remove eventually */
> +   s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 
> 16);
> +   } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
> +   s.align = MIN2(symbol->st_value, 1u << 16);
> +   report_if(!util_is_power_of_two_nonzero(s.align));
> +   } else
> continue;
>
> -   report_elf_if(symbol->st_size > 1u << 29);
> +   report_if(symbol->st_size > 1u << 29);
>
> -   struct ac_rtld_symbol s = {};
> s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
> s.size = symbol->st_size;
> -   s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
> s.part_idx = part_idx;
>
> if (!strcmp(s.name, "__lds_end")) {
> report_elf_if(s.size != 0);
> *lds_end_align = MAX2(*lds_end_align, s.align);
> continue;
> }
>
> const struct ac_rtld_symbol *shared =
> find_symbol(&binary->lds_symbols, s.name, part_idx);
> @@ -217,20 +234,21 @@ static bool read_private_lds_symbols(struct 
> ac_rtld_binary *binary,
> report_elf_if(s.align > shared->align);
> report_elf_if(s.size > shared->size);
> continue;
> }
>
> util_dynarray_append(&binary->lds_symbols, struct 
> ac_rtld_symbol, s);
> }
>
> return true;
>
> +#undef report_if
>  #undef report_elf_if
>  }
>
>  /**
>   * Open a binary consisting of one or more shader parts.
>   *
>   * \param binary the uninitializ

Re: [Mesa-dev] [PATCH 5/8] radeonsi: don't set spi_ps_input_* for monolithic shaders

2019-06-21 Thread Bas Nieuwenhuizen
Doesn't this cause assertions in si_shader_ps() for monolithic
shaders? Some of these assertions check that at least one bit in a
group is set and I think we end up with input_ena = 0 for monolithic
shaders now?

On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> The driver doesn't use these values and ac_rtld has assertions
> expecting the value of 0.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 39 
>  1 file changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 54b29d0ae01..0489399b827 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -6128,21 +6128,22 @@ static void si_get_ps_prolog_key(struct si_shader 
> *shader,
>  key->ps_prolog.states.bc_optimize_for_linear);
> key->ps_prolog.ancillary_vgpr_index = 
> shader->info.ancillary_vgpr_index;
>
> if (info->colors_read) {
> unsigned *color = shader->selector->color_attr_index;
>
> if (shader->key.part.ps.prolog.color_two_side) {
> /* BCOLORs are stored after the last input. */
> key->ps_prolog.num_interp_inputs = info->num_inputs;
> key->ps_prolog.face_vgpr_index = 
> shader->info.face_vgpr_index;
> -   shader->config.spi_ps_input_ena |= 
> S_0286CC_FRONT_FACE_ENA(1);
> +   if (separate_prolog)
> +   shader->config.spi_ps_input_ena |= 
> S_0286CC_FRONT_FACE_ENA(1);
> }
>
> for (unsigned i = 0; i < 2; i++) {
> unsigned interp = info->input_interpolate[color[i]];
> unsigned location = 
> info->input_interpolate_loc[color[i]];
>
> if (!(info->colors_read & (0xf << i*4)))
> continue;
>
> key->ps_prolog.color_attr_index[i] = color[i];
> @@ -6159,66 +6160,78 @@ static void si_get_ps_prolog_key(struct si_shader 
> *shader,
> case TGSI_INTERPOLATE_COLOR:
> /* Force the interpolation location for 
> colors here. */
> if 
> (shader->key.part.ps.prolog.force_persp_sample_interp)
> location = 
> TGSI_INTERPOLATE_LOC_SAMPLE;
> if 
> (shader->key.part.ps.prolog.force_persp_center_interp)
> location = 
> TGSI_INTERPOLATE_LOC_CENTER;
>
> switch (location) {
> case TGSI_INTERPOLATE_LOC_SAMPLE:
> 
> key->ps_prolog.color_interp_vgpr_index[i] = 0;
> -   shader->config.spi_ps_input_ena |=
> -   S_0286CC_PERSP_SAMPLE_ENA(1);
> +   if (separate_prolog) {
> +   
> shader->config.spi_ps_input_ena |=
> +   
> S_0286CC_PERSP_SAMPLE_ENA(1);
> +   }
> break;
> case TGSI_INTERPOLATE_LOC_CENTER:
> 
> key->ps_prolog.color_interp_vgpr_index[i] = 2;
> -   shader->config.spi_ps_input_ena |=
> -   S_0286CC_PERSP_CENTER_ENA(1);
> +   if (separate_prolog) {
> +   
> shader->config.spi_ps_input_ena |=
> +   
> S_0286CC_PERSP_CENTER_ENA(1);
> +   }
> break;
> case TGSI_INTERPOLATE_LOC_CENTROID:
> 
> key->ps_prolog.color_interp_vgpr_index[i] = 4;
> -   shader->config.spi_ps_input_ena |=
> -   
> S_0286CC_PERSP_CENTROID_ENA(1);
> +   if (separate_prolog) {
> +   
> shader->config.spi_ps_input_ena |=
> +   
> S_0286CC_PERSP_CENTROID_ENA(1);
> +   }
> break;
> default:
> assert(0);
> }
> break;
> case TGSI_INTERPOLATE_LINEAR:
> /* Force the interpolati

[Mesa-dev] [Bug 99781] Some Unity games fail assertion on startup in glXCreateContextAttribsARB

2019-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99781

--- Comment #21 from Hal Gentz  ---
I just spotted that this got reopened today when going through my emails for
the first time in forever... this is unfortunate.

I'll take another swing at this sometime next week using Uli Schlachter's new
proposed method. Hopefully that doesn't cause a regression.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Possible bug in nir_algebraic?

2019-06-21 Thread Ian Romanick
I have encountered what I believe to be a bug in nir_algebraic.  Since
the rewrite to use automata, I'm not sure how to begin debugging it.
I'm looking for some suggestions... even if the suggestion is, "Fix your
patterns."

I have added a pattern like:

   (('~fadd@32', ('fmul', ('fadd', 1.0, ('fneg', a)),
  ('fadd', 1.0, ('fneg', a))),
 ('fmul', ('flrp', a, 1.0, a), b)),
('flrp', 1.0, b, a), '!options->lower_flrp32'),

While using NIR_PRINT=1, I see this in my instruction stream:

vec1 32 ssa_2 = load_const (0x3f80 /* 1.00 */)
...
vec1 32 ssa_196 = intrinsic load_uniform (ssa_195) (68, 4, 160)
vec1 32 ssa_83 = fneg ssa_196
vec1 32 ssa_84 = fadd ssa_83, ssa_2
vec1 32 ssa_85 = fmul ssa_84, ssa_84
...
vec1 32 ssa_95 = flrp ssa_196, ssa_2, ssa_196
vec1 32 ssa_96 = fmul ssa_78, ssa_95
vec1 32 ssa_97 = fadd ssa_96, ssa_85

But nir_opt_algebraic does not make any progress.  It sure looks like it
should trigger with a = ssa_196 and b = ssa_78.

However, progress is made if I change the pattern to

   (('~fadd@32', ('fmul', ('fadd', 1.0, ('fneg', a)),
  c),
 ('fmul', ('flrp', a, 1.0, a), b)),
('flrp', 1.0, b, a), '!options->lower_flrp32'),

ssa_85 is definitely ('fmul', ssa_84, ssa_84), and ssa_84 is definitely
('fadd', 1.0, ('fneg', ssa_196))... both times. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 99781] Some Unity games fail assertion on startup in glXCreateContextAttribsARB

2019-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99781

Hal Gentz  changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |zegen...@protonmail.com
   |org |

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radeonsi: glmark2 - regression (GL_INVALID_OPERATION in glFramebufferTexture2D) since your work around 2019-06-05

2019-06-21 Thread Dieter Nützel

Hello Emil,

I see glmark2 - [desktop] blur-radius=5

libpng warning: iCCP: known incorrect sRGB profile
Mesa: User error: GL_INVALID_OPERATION in 
glFramebufferTexture2D(window-system framebuffer)
[desktop] blur-radius=5:effect=blur:passes=1:separable=true:windows=4: 
FPS: 4879 FrameTime: 0.205 ms


after your commits around beginning of June (2019-06-05) or your 
'mapi'-work commited around 2019-06-10.

Have to bisect.
Any hints/ideas for a good starting point?

Greetings,
Dieter
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110603] Blocky and black opacity/alpha using RADV on some games

2019-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110603

--- Comment #7 from Samuel Pitoiset  ---
Can you record a renderdoc capture of the problem please?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110662] shadow artifacts in Assassin's Creed: Unity

2019-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110662

soredake  changed:

   What|Removed |Added

 CC||fds...@krutt.org

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Eager to work on Mesa Project under XorgEVoC

2019-06-21 Thread PRIYANSHU VARSHNEY
Hello Xorg Community!
My name is Priyanshu Varshney. a* fourth-year* student at *Indian Institute
of Technology (IIT)*, Indore, India. I am eager to work on writing an
advanced configuration tool for Mesa drivers and thus I want to know the
possible mentors for the project so that I can start diving in the project.

Thanking You
Priyanshu Varshney
IIT INDORE
https://www.linkedin.com/in/priyanshu-varshney-476849152/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] XDC 2019: Less than three weeks to go to submit your talks, workshops or demos!

2019-06-21 Thread Mark Filion
Hello!

Less than three weeks to go to submit your talks, workshops or demos
for this year's X.Org Developer Conference, which will be taking place
in Montréal, Canada on October 2-4, 2019!

The 2019 X.Org Developers Conference is the annual technical meeting
for X Window System and Free Desktop developers. Attendees will gather
to discuss outstanding technical issues related to the Open Source
Graphics stack (Linux kernel, Mesa, DRM, Wayland, X11, etc.) and its
software ecosystem.

While any serious proposal will be gratefully considered, topics of
interest
to X.Org and freedesktop.org developers are encouraged. The program
focus is on new development, ongoing challenges and anything else that
will spark discussions among attendees in the hallway track.

We are open to talks across all layers of the graphics stack, from the
kernel
to desktop environments / graphical applications and about how to make
things better for the developers who build them. 

Head to the XDC website to learn more: 

https://xdc2019.x.org/

The deadline for submissions Sunday, 7 July 2019.

Best,

Mark

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] panfrost: Print a backtrace if there is one

2019-06-21 Thread Rohan Garg
Hey
> Good stuff, but isn't this change making us hit assertions?

Hm, I thought it only enabled debugging symbols. Do you have a recommendation 
on how to enable debug symbols but not enabling assertions?

Cheers
Rohan Garg


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] panfrost: Make the gitlab-ci.yml file more robust

2019-06-21 Thread Rohan Garg
On Friday, 21 June 2019 07:40:02 CEST Tomeu Vizoso wrote:
> On 6/20/19 6:33 PM, Rohan Garg wrote:
> >> Not sure I understand how this works. Isn't this going to cause
> >> unnecessary container builds?
> >> 
> >> And, why cannot developers just add whatever they want to DEBIAN_TAG
> >> to that end?
> > 
> > This will spin a container for each branch, yes, though I think that is
> > how it should be.
> 
> Well, things are like this right now precisely because people wanted to
> avoid what you are proposing. I would be quite sad if every time I push a
> new branch I had to wait for the container to be rebuilt. Also note that
> this is how other projects based on wayland/ci-templates work, including
> the rest of mesa.
> 
> > The patch allows for someone working in a branch to
> > ( if the need be ) customize their containers for their branch.
> 
> As I said, you can easily do that atm by just updating DEBIAN_TAG.
> 
> > To that extent, I've simply enabled the CI to auto generate a DEBIAN_TAG
> > which depends on the branch name so that a developer doesn't have to
> > change
> > the DEBIAN_TAG themselves.
> 
> But, why is that such a problem?
> 

Fair enough, I'll rework the patch to drop the modified DEBIAN_TAG then.

Cheers
Rohan Garg

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] panfrost: Print a backtrace if there is one

2019-06-21 Thread Tomeu Vizoso

On 6/21/19 12:23 PM, Rohan Garg wrote:

Hey

Good stuff, but isn't this change making us hit assertions?


Hm, I thought it only enabled debugging symbols. Do you have a recommendation
on how to enable debug symbols but not enabling assertions?


I think assertions should be limited to the most basic of sanity checks, 
and the other asserts replaced by proper error handling.


Cheers,

Tomeu
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] panfrost: ci: Fix parsing of crashed tests

2019-06-21 Thread Tomeu Vizoso
Without this fix, LAVA isn't parsing crashes as failed tests, because
the shell logging is interspersed within the fake deqp output.

Signed-off-by: Tomeu Vizoso 
---
 src/gallium/drivers/panfrost/ci/deqp-runner.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/ci/deqp-runner.sh 
b/src/gallium/drivers/panfrost/ci/deqp-runner.sh
index 8645f75080e6..0b11202c1086 100644
--- a/src/gallium/drivers/panfrost/ci/deqp-runner.sh
+++ b/src/gallium/drivers/panfrost/ci/deqp-runner.sh
@@ -46,8 +46,8 @@ while [ -s /tmp/case-list.txt ]; do
sed -i "0,/^$crashed_test$/d" /tmp/case-list.txt
 
# So LAVA knows what happened
-   echo "Test case '$crashed_test'.."
-   echo "  Crash"
+   echo "Test case '$crashed_test'..
+ Crash"
else
# Consume a whole batch
sed -i '1,'$BATCH_SIZE'd' /tmp/case-list.txt
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/1] panfrost: Use mir_foreach_instr_in_block_safe

2019-06-21 Thread Rohan Garg
Use the _safe version as asserted by gallium

Signed-off-by: Rohan Garg 
---
 src/gallium/drivers/panfrost/midgard/midgard_schedule.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c 
b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
index 0bf3502f41c..1aef19c824c 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
@@ -406,7 +406,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
 
 block->quadword_count = 0;
 
-mir_foreach_instr_in_block(block, ins) {
+mir_foreach_instr_in_block_safe(block, ins) {
 int skip;
 midgard_bundle bundle = schedule_bundle(ctx, block, ins, 
&skip);
 util_dynarray_append(&block->bundles, midgard_bundle, bundle);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrost: ci: Fix parsing of crashed tests

2019-06-21 Thread Alyssa Rosenzweig
A-b
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] panfrost: Track buffer initialization

2019-06-21 Thread Alyssa Rosenzweig
We want to know if a given slice of a buffer is initialized at a
particular point in the execution of the program. This is accomplished
easily enough -- start out uninitialized and upon an operation writing
to the buffer, mark it initialized.

The motivation is to optimize away expensive operations (like wallpaper
blits) when reading from an uninitialized buffer; since it's
uninitialized, the results of these operations are undefined, and it's
legal to take the fast path ^_^

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c  |  9 +
 src/gallium/drivers/panfrost/pan_fragment.c | 21 +
 src/gallium/drivers/panfrost/pan_resource.c | 12 ++--
 src/gallium/drivers/panfrost/pan_resource.h |  3 +++
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index d8c5510a31e..6257ffe2ac4 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1397,6 +1397,15 @@ panfrost_draw_wallpaper(struct pipe_context *pipe)
if (ctx->pipe_framebuffer.cbufs[0] == NULL)
return;
 
+/* Check if the buffer has any content on it worth preserving */
+
+struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
+struct panfrost_resource *rsrc = pan_resource(surf->texture);
+unsigned level = surf->u.tex.level;
+
+if (!rsrc->bo->slices[level].initialized)
+return;
+
 /* Save the batch */
 struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
 
diff --git a/src/gallium/drivers/panfrost/pan_fragment.c 
b/src/gallium/drivers/panfrost/pan_fragment.c
index 70358fec3f3..d6b8afdc6b9 100644
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -28,6 +28,17 @@
 
 #include "util/u_format.h"
 
+/* Mark a surface as written */
+
+static void
+panfrost_initialize_surface(struct pipe_surface *surf)
+{
+unsigned level = surf->u.tex.level;
+struct panfrost_resource *rsrc = pan_resource(surf->texture);
+
+rsrc->bo->slices[level].initialized = true;
+}
+
 /* Generate a fragment job. This should be called once per frame. (According to
  * presentations, this is supposed to correspond to eglSwapBuffers) */
 
@@ -38,6 +49,16 @@ panfrost_fragment_job(struct panfrost_context *ctx, bool 
has_draws)
 panfrost_sfbd_fragment(ctx, has_draws) :
 panfrost_mfbd_fragment(ctx, has_draws);
 
+/* Mark the affected buffers as initialized, since we're writing to it 
*/
+struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
+
+for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
+panfrost_initialize_surface(fb->cbufs[i]);
+}
+
+if (fb->zsbuf)
+panfrost_initialize_surface(fb->zsbuf);
+
 struct mali_job_descriptor_header header = {
 .job_type = JOB_TYPE_FRAGMENT,
 .job_index = 1,
diff --git a/src/gallium/drivers/panfrost/pan_resource.c 
b/src/gallium/drivers/panfrost/pan_resource.c
index a99840e4a52..1a4ce8ef297 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -71,6 +71,7 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen,
 
rsc->bo = screen->driver->import_bo(screen, whandle);
rsc->bo->slices[0].stride = whandle->stride;
+rsc->bo->slices[0].initialized = true;
 
if (screen->ro) {
rsc->scanout =
@@ -509,7 +510,7 @@ panfrost_transfer_map(struct pipe_context *pctx,
 transfer->map = rzalloc_size(transfer, 
transfer->base.layer_stride * box->depth);
 assert(box->depth == 1);
 
-if (usage & PIPE_TRANSFER_READ) {
+if ((usage & PIPE_TRANSFER_READ) && 
bo->slices[level].initialized) {
 if (bo->layout == PAN_AFBC) {
 DBG("Unimplemented: reads from AFBC");
 } else if (bo->layout == PAN_TILED) {
@@ -528,6 +529,12 @@ panfrost_transfer_map(struct pipe_context *pctx,
 transfer->base.stride = bo->slices[level].stride;
 transfer->base.layer_stride = bo->cubemap_stride;
 
+/* By mapping direct-write, we're implicitly already
+ * initialized (maybe), so be conservative */
+
+if ((usage & PIPE_TRANSFER_WRITE) && (usage & 
PIPE_TRANSFER_MAP_DIRECTLY))
+bo->slices[level].initialized = true;
+
 return bo->cpu
 + bo->slices[level].offset
 + transfer->base.box.z * bo->cubemap_stride
@@ -549,11 +556,12 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
 struct panfrost_bo *bo = prsrc->bo;
 
 if (transfer->usage & PIPE_TR

[Mesa-dev] [PATCH 0/3] panfrost/midgard: Merge constants

2019-06-21 Thread Alyssa Rosenzweig
Just a fun optimization, shaves some cycles off glmark shaders.

Alyssa Rosenzweig (3):
  panfrost/midgard: Share swizzle/mask code
  panfrost/midgard: Share swizzle compose
  panfrost/midgard: Merge embedded constants

 .../drivers/panfrost/midgard/helpers.h| 60 +
 .../panfrost/midgard/midgard_compile.c| 29 ---
 .../drivers/panfrost/midgard/midgard_ra.c |  9 +-
 .../panfrost/midgard/midgard_schedule.c   | 85 ++-
 4 files changed, 127 insertions(+), 56 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] panfrost/midgard: Share swizzle compose

2019-06-21 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/midgard/helpers.h| 29 +++
 .../drivers/panfrost/midgard/midgard_ra.c |  9 +-
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h 
b/src/gallium/drivers/panfrost/midgard/helpers.h
index ff069d3f8bb..cbf51fe882a 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -264,4 +264,33 @@ vector_alu_from_unsigned(unsigned u)
 return s;
 }
 
+/* Composes two swizzles */
+static inline unsigned
+pan_compose_swizzle(unsigned left, unsigned right)
+{
+unsigned out = 0;
+
+for (unsigned c = 0; c < 4; ++c) {
+unsigned s = (left >> (2*c)) & 0x3;
+unsigned q = (right >> (2*s)) & 0x3;
+
+out |= (q << (2*c));
+}
+
+return out;
+}
+
+/* Applies a swizzle to an ALU source */
+
+static inline unsigned
+vector_alu_apply_swizzle(unsigned src, unsigned swizzle)
+{
+midgard_vector_alu_src s =
+vector_alu_from_unsigned(src);
+
+s.swizzle = pan_compose_swizzle(s.swizzle, swizzle);
+
+return vector_alu_srco_unsigned(s);
+}
+
 #endif
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c 
b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
index 7fd5e5b49e3..40a73709871 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_ra.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
@@ -92,14 +92,7 @@ static unsigned
 compose_swizzle(unsigned swizzle, unsigned mask,
 struct phys_reg reg, struct phys_reg dst)
 {
-unsigned out = 0;
-
-for (unsigned c = 0; c < 4; ++c) {
-unsigned s = (swizzle >> (2*c)) & 0x3;
-unsigned q = (reg.swizzle >> (2*s)) & 0x3;
-
-out |= (q << (2*c));
-}
+unsigned out = pan_compose_swizzle(swizzle, reg.swizzle);
 
 /* Based on the register mask, we need to adjust over. E.g if we're
  * writing to yz, a base swizzle of xy__ becomes _xy_. Save the
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/2] panfrost: Implement rasterizer discard

2019-06-21 Thread Alyssa Rosenzweig
Now that scoreboarding is working, this is trivial.

Alyssa Rosenzweig (2):
  panfrost: Implement rasterizer discard
  panfrost: Conditionally submit fragment job

 src/gallium/drivers/panfrost/pan_context.c | 14 --
 src/gallium/drivers/panfrost/pan_drm.c |  5 -
 2 files changed, 16 insertions(+), 3 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] radeonsi: set the calling convention for inlined function calls

2019-06-21 Thread Bas Nieuwenhuizen
r-b

On Thu, Jun 20, 2019 at 6:19 AM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> otherwise the behavior is undefined
> ---
>  src/amd/common/ac_llvm_build.c | 8 
>  src/amd/common/ac_llvm_build.h | 3 +++
>  src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 2 +-
>  src/gallium/drivers/radeonsi/si_shader.c   | 2 +-
>  4 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 1e6247ad72e..cdd4c36f5da 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -4427,10 +4427,18 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, 
> LLVMValueRef interp_ij)
>
>  LLVMValueRef
>  ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
>  {
> LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
>  ctx->i1, NULL, 0,
>  AC_FUNC_ATTR_READNONE);
> result = LLVMBuildNot(ctx->builder, result, "");
> return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
>  }
> +
> +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
> +  LLVMValueRef *args, unsigned num_args)
> +{
> +   LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, 
> "");
> +   LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
> +   return ret;
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index a1654d2b2c4..1928843c78c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -713,20 +713,23 @@ ac_build_frexp_exp(struct ac_llvm_context *ctx, 
> LLVMValueRef src0,
>  LLVMValueRef
>  ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
> unsigned bitsize);
>
>  LLVMValueRef
>  ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
>
>  LLVMValueRef
>  ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
>
> +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
> +  LLVMValueRef *args, unsigned num_args);
> +
>  LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, 
> LLVMAtomicRMWBinOp op,
>  LLVMValueRef ptr, LLVMValueRef val,
>  const char *sync_scope);
>
>  LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
> LLVMValueRef ptr,
>   LLVMValueRef cmp, LLVMValueRef val,
>   const char *sync_scope);
>
>  #ifdef __cplusplus
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c 
> b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
> index 0f2934243a1..28da7b92250 100644
> --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
> +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
> @@ -660,21 +660,21 @@ void si_build_prim_discard_compute_shader(struct 
> si_shader_context *ctx)
> index[1] = LLVMBuildSelect(builder, prim_is_odd, 
> index0, index1, "");
> }
> }
>
> /* Execute the vertex shader for each vertex to get vertex positions. 
> */
> LLVMValueRef pos[3][4];
> for (unsigned i = 0; i < vertices_per_prim; i++) {
> vs_params[param_vertex_id] = index[i];
> vs_params[param_instance_id] = instance_id;
>
> -   LLVMValueRef ret = LLVMBuildCall(builder, vs, vs_params, 
> num_vs_params, "");
> +   LLVMValueRef ret = ac_build_call(&ctx->ac, vs, vs_params, 
> num_vs_params);
> for (unsigned chan = 0; chan < 4; chan++)
> pos[i][chan] = LLVMBuildExtractValue(builder, ret, 
> chan, "");
> }
>
> /* Divide XYZ by W. */
> for (unsigned i = 0; i < vertices_per_prim; i++) {
> for (unsigned chan = 0; chan < 3; chan++)
> pos[i][chan] = ac_build_fdiv(&ctx->ac, pos[i][chan], 
> pos[i][3]);
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index eb75e2a77a4..54b29d0ae01 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -6584,21 +6584,21 @@ static void si_build_wrapper_function(struct 
> si_shader_context *ctx,
> }
> } else {
> arg = LLVMBuildBitCast(builder, arg, 
> param_type, "");
> }
> }
>
> in[param_idx] = arg;
> out_idx += param_size;
> }
>
> -   ret = LLVMBuildCall(builder, parts[part], in, num_par

[Mesa-dev] [PATCH 1/3] panfrost/midgard: Share swizzle/mask code

2019-06-21 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/midgard/helpers.h| 31 +++
 .../panfrost/midgard/midgard_compile.c| 29 -
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h 
b/src/gallium/drivers/panfrost/midgard/helpers.h
index a2cc7e67eaf..ff069d3f8bb 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -151,6 +151,37 @@ quadword_size(int tag)
 #define COMPONENT_Z 0x2
 #define COMPONENT_W 0x3
 
+#define SWIZZLE_ SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, 
COMPONENT_X)
+#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, 
COMPONENT_X)
+#define SWIZZLE_XYZX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, 
COMPONENT_X)
+#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, 
COMPONENT_W)
+#define SWIZZLE_XYXZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, 
COMPONENT_Z)
+#define SWIZZLE_ SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, 
COMPONENT_W)
+
+static inline unsigned
+swizzle_of(unsigned comp)
+{
+switch (comp) {
+case 1:
+return SWIZZLE_;
+case 2:
+return SWIZZLE_XYXX;
+case 3:
+return SWIZZLE_XYZX;
+case 4:
+return SWIZZLE_XYZW;
+default:
+unreachable("Invalid component count");
+}
+}
+
+static inline unsigned
+mask_of(unsigned nr_comp)
+{
+return (1 << nr_comp) - 1;
+}
+
+
 /* See ISA notes */
 
 #define LDST_NOP (3)
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c 
b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 1cc551b603c..ddcb9a0a092 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -82,35 +82,6 @@ midgard_block_add_successor(midgard_block *block, 
midgard_block *successor)
  * driver seems to do it that way */
 
 #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
-#define SWIZZLE_ SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, 
COMPONENT_X)
-#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, 
COMPONENT_X)
-#define SWIZZLE_XYZX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, 
COMPONENT_X)
-#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, 
COMPONENT_W)
-#define SWIZZLE_XYXZ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, 
COMPONENT_Z)
-#define SWIZZLE_ SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, 
COMPONENT_W)
-
-static inline unsigned
-swizzle_of(unsigned comp)
-{
-switch (comp) {
-case 1:
-return SWIZZLE_;
-case 2:
-return SWIZZLE_XYXX;
-case 3:
-return SWIZZLE_XYZX;
-case 4:
-return SWIZZLE_XYZW;
-default:
-unreachable("Invalid component count");
-}
-}
-
-static inline unsigned
-mask_of(unsigned nr_comp)
-{
-return (1 << nr_comp) - 1;
-}
 
 #define M_LOAD_STORE(name, rname, uname) \
static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] panfrost: Conditionally submit fragment job

2019-06-21 Thread Alyssa Rosenzweig
If there are no tiling jobs and no clears, there is no need to submit a
fragment job (relevant for transform feedback).

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_drm.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index 77ec419398e..3914fdc09cc 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -265,7 +265,10 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context 
*ctx, bool has_draws, bool
assert(!ret);
}
 
-   ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, 
has_draws), PANFROST_JD_REQ_FS, surf);
+if (job->first_tiler.gpu || job->clear) {
+ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, 
has_draws), PANFROST_JD_REQ_FS, surf);
+assert(!ret);
+}
 
 return ret;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] panfrost/midgard: Merge embedded constants

2019-06-21 Thread Alyssa Rosenzweig
In Midgard, a bundle consists of a few ALU instructions. Within the
bundle, there is room for an optional 128-bit constant; this constant is
shared across all instructions in the bundle.

Unfortunately, many instructions want a 128-bit constant all to
themselves (how selfish!). If we run out of space for constants in a
bundle, the bundle has to be broken up, incurring a performance and
space penalty.

As an optimization, the scheduler now analyzes the constants coming in
per-instruction and attempts to merge shared components, adjusting the
swizzle accessing the bundle's constants appropriately. Concretely,
given the GLSL:

   (a * vec4(1.5, 0.5, 0.5, 1.0)) + vec4(1.0, 2.3, 2.3, 0.5)

instead of compiling to the naive two bundles:

   vmul.fmul [temp], [a], r26
   fconstants 1.5, 0.5, 0.5, 1.0

   vadd.fadd [out], [temp], r26
   fconstants 1.0, 2.3, 2.3, 0.5

The scheduler can now fuse into a single (pipelined!) bundle:

   vmul.fmul [temp], [a], r26.xyyz
   vadd.fadd [out], [temp], r26.zwwy
   fconstants 1.5, 0.5, 1.0, 2.3

Signed-off-by: Alyssa Rosenzweig 
---
 .../panfrost/midgard/midgard_schedule.c   | 85 ++-
 1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c 
b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
index 0bf3502f41c..7059f7bbe2a 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
@@ -147,6 +147,8 @@ schedule_bundle(compiler_context *ctx, midgard_block 
*block, midgard_instruction
 instructions_emitted = -1;
 midgard_instruction *pins = ins;
 
+unsigned constant_count = 0;
+
 for (;;) {
 midgard_instruction *ains = pins;
 
@@ -251,33 +253,78 @@ schedule_bundle(compiler_context *ctx, midgard_block 
*block, midgard_instruction
 
 segment[segment_size++] = ains;
 
-/* Only one set of embedded constants per
- * bundle possible; if we have more, we must
- * break the chain early, unfortunately */
+/* We try to reuse constants if possible, by adjusting
+ * the swizzle */
+
+if (ains->has_blend_constant) {
+bundle.has_blend_constant = 1;
+bundle.has_embedded_constants = 1;
+} else if (ains->has_constants) {
+/* By definition, blend constants conflict with
+ * everything, so if there are already
+ * constants we break the bundle *now* */
+
+if (bundle.has_blend_constant)
+break;
+
+/* For anything but blend constants, we can do
+ * proper analysis, however */
+
+/* TODO: Mask by which are used */
+uint32_t *constants = (uint32_t *) 
ains->constants;
+uint32_t *bundles = (uint32_t *) 
bundle.constants;
 
-if (ains->has_constants) {
-if (bundle.has_embedded_constants) {
-/* The blend constant needs to be
- * alone, since it conflicts with
- * everything by definition */
+uint32_t indices[4] = { 0 };
+bool break_bundle = false;
 
-if (ains->has_blend_constant || 
bundle.has_blend_constant)
+for (unsigned i = 0; i < 4; ++i) {
+uint32_t cons = constants[i];
+bool constant_found = false;
+
+/* Search for the constant */
+for (unsigned j = 0; j < 
constant_count; ++j) {
+if (bundles[j] != cons)
+continue;
+
+/* We found it, reuse */
+indices[i] = j;
+constant_found = true;
 break;
+}
+
+if (constant_found)
+continue;
 
-/* ...but if there are already
- * constants but these are the
-   

[Mesa-dev] [PATCH 1/2] panfrost: Implement rasterizer discard

2019-06-21 Thread Alyssa Rosenzweig
D'aww, look cute that is now that scoreboarding is setup.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_context.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 6257ffe2ac4..a803a8b68cb 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1338,12 +1338,22 @@ panfrost_queue_draw(struct panfrost_context *ctx)
 /* Handle dirty flags now */
 panfrost_emit_for_draw(ctx, true);
 
+/* If rasterizer discard is enable, only submit the vertex */
+
+bool rasterizer_discard = ctx->rasterizer
+&& ctx->rasterizer->base.rasterizer_discard;
+
 struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, 
false);
-struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true);
+struct panfrost_transfer tiler;
+
+if (!rasterizer_discard)
+tiler = panfrost_vertex_tiler_job(ctx, true);
 
 struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
 
-if (ctx->wallpaper_batch)
+if (rasterizer_discard)
+panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE);
+else if (ctx->wallpaper_batch)
 panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, 
tiler);
 else
 panfrost_scoreboard_queue_fused_job(batch, vertex, tiler);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/8] radeonsi: flatten the switch for DPBB tunables

2019-06-21 Thread Bas Nieuwenhuizen
r-b

On Thu, Jun 20, 2019 at 6:20 AM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> ---
>  .../drivers/radeonsi/si_state_binning.c| 18 --
>  1 file changed, 4 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c 
> b/src/gallium/drivers/radeonsi/si_state_binning.c
> index 6285ccc28c2..a6b1830b661 100644
> --- a/src/gallium/drivers/radeonsi/si_state_binning.c
> +++ b/src/gallium/drivers/radeonsi/si_state_binning.c
> @@ -395,34 +395,24 @@ void si_emit_dpbb_state(struct si_context *sctx)
> punchout_mode = V_028060_AUTO;
> disable_start_of_prim = (cb_target_enabled_4bit &
>  blend->blend_enable_4bit) != 0;
> }
>
> /* Tunable parameters. Also test with DFSM enabled/disabled. */
> unsigned context_states_per_bin; /* allowed range: [0, 5] */
> unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
> unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
>
> -   switch (sctx->family) {
> -   case CHIP_VEGA10:
> -   case CHIP_VEGA12:
> -   case CHIP_VEGA20:
> -   case CHIP_RAVEN:
> -   case CHIP_RAVEN2:
> -   /* Tuned for Raven. Vega might need different values. */
> -   context_states_per_bin = 5;
> -   persistent_states_per_bin = 31;
> -   fpovs_per_batch = 63;
> -   break;
> -   default:
> -   assert(0);
> -   }
> +   /* Tuned for Raven. Vega might need different values. */
> +   context_states_per_bin = 5;
> +   persistent_states_per_bin = 31;
> +   fpovs_per_batch = 63;
>
> /* Emit registers. */
> struct uvec2 bin_size_extend = {};
> if (bin_size.x >= 32)
> bin_size_extend.x = util_logbase2(bin_size.x) - 5;
> if (bin_size.y >= 32)
> bin_size_extend.y = util_logbase2(bin_size.y) - 5;
>
> unsigned initial_cdw = sctx->gfx_cs->current.cdw;
> radeon_opt_set_context_reg(
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding

2019-06-21 Thread Tomeu Vizoso

On 6/20/19 8:20 PM, Alyssa Rosenzweig wrote:

This is a rather complex change, adding a lot of code but ideally
cleaning up quite a bit as we go.

Within a batch (single frame), there are multiple distinct Mali job
types: SET_VALUE, VERTEX, TILER, FRAGMENT for the few that we emit right
now (eventually more for compute and geometry shaders). Each hardware
job has a mali_job_descriptor_header, which contains three fields of
interest: job index, a dependencies list, and a next job pointer.

The next job pointer in each job is used to form a linked list of
submitted jobs. Easy enough.

The job index and dependencies list, however, are used to form a
dependency graph (a DAG, where each hardware job is a node and each
dependency is a directed edge). Internally, this sets up a scoreboarding
data structure for the hardware to dispatch jobs in parallel, enabling
(for example) vertex shaders from different draws to execute in parallel
while there are strict dependencies between tiling the geometry of a
draw and running that vertex shader.

For a while, we got by with an incredible series of total hacks,
manually coding indices, lists, and dependencies. That worked for a
moment, but combinatorial kaboom kicked in and it became an
unmaintainable mess of spaghetti code.

We can do better. This commit explicitly handles the scoreboarding by
providing high-level manipulation for jobs. Rather than a command like
"set dependency #2 to index 17", we can express quite naturally "add a
dependency from job T on job V". Instead of some open-coded logic to
copy a draw pointer into a delicate context array, we now have an
elegant exposed API to simple "queue a job of type XYZ".

The design is influenced by both our current requirements (standard ES2
draws and u_blitter) as well as the need for more complex scheduling in
the future. For instance, blits can be optimized to use only a tiler
job, without a vertex job first (since the screen-space vertices are
known ahead-of-time) -- causing tiler-only jobs. Likewise, when using
transform feedback with rasterizer discard enabled, vertex jobs are
created (to run vertex shaders) with no corresponding tiler job. Both of
these cases break the original model and could not be expressed with the
open-coded logic. More generally, this will make it easier to add
support for compute shaders, geometry shaders, and fused jobs (an
optimization available on Bifrost).

Incidentally, this moves quite a bit of state from the driver context to
the batch, which helps with Rohan's refactor to eventually permit
pipelining across framebuffers (one important outstanding optimization
for FBO-heavy workloads).

Signed-off-by: Alyssa Rosenzweig 
---
  src/gallium/drivers/panfrost/meson.build  |   1 +
  src/gallium/drivers/panfrost/pan_context.c| 132 +
  src/gallium/drivers/panfrost/pan_context.h|  19 +-
  src/gallium/drivers/panfrost/pan_drm.c|   6 +-
  src/gallium/drivers/panfrost/pan_job.c|  18 +-
  src/gallium/drivers/panfrost/pan_job.h|  61 +++
  src/gallium/drivers/panfrost/pan_scoreboard.c | 453 ++
  7 files changed, 548 insertions(+), 142 deletions(-)
  create mode 100644 src/gallium/drivers/panfrost/pan_scoreboard.c

diff --git a/src/gallium/drivers/panfrost/meson.build 
b/src/gallium/drivers/panfrost/meson.build
index 43d73ce2086..4298242f6b9 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -57,6 +57,7 @@ files_panfrost = files(
'pan_blend_shaders.c',
'pan_pretty_print.c',
'pan_fragment.c',
+  'pan_scoreboard.c',
'pan_sfbd.c',
'pan_mfbd.c',
'pan_tiler.c',
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 41656236b5b..d8c5510a31e 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -517,15 +517,6 @@ panfrost_default_shader_backend(struct panfrost_context 
*ctx)
  memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
  }
  
-static void

-panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next)
-{
-if (first->job_descriptor_size)
-first->next_job_64 = (u64) (uintptr_t) next;
-else
-first->next_job_32 = (u32) (uintptr_t) next;
-}
-
  /* Generates a vertex/tiler job. This is, in some sense, the heart of the
   * graphics command stream. It should be called once per draw, accordding to
   * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
@@ -535,12 +526,8 @@ panfrost_link_job_pair(struct mali_job_descriptor_header 
*first, mali_ptr next)
  struct panfrost_transfer
  panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
  {
-/* Each draw call corresponds to two jobs, and the set-value job is 
first */
-int draw_job_index = 1 + (2 * ctx->draw_count) + 1;
-
  struct mali_job_descriptor_header job = {

Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding

2019-06-21 Thread Alyssa Rosenzweig
> Very nice, this will fix a few tests.

Ah, of course :)

> Good riddance!

To be fair, a lot of that just got moved in with panfrost_job.. :)

> Can we remove now the has_draws arg?

Maybe? The has_draws arg controls a lot of things beyond just, you
know, having draws. The whole API there needs to be overhauled quite a
bit. So yes, we can remove it, but -after- this is merged. Enough code
is being changed here as it is; we don't need more things to break.

> What's the primary batch? Might warrant an explanation somewhere.

Meh, I was making up terminology as I went. The primary batch being
everything but the FRAGMENT job -- so, the SET_VALUE/VERTEX/TILER jobs
(and later COMPUTE/GEOMETRY/FUSED). I.e. everything that we actually
handle in panfrost_job and the scoreboarding module, rather than just
what we tag on in pan_fragment (which is sort of special cased? not that
you couldn't also scoreboard those but there's no reason to since
there's only ever one in the chain -- so no deps and manual index=1 --
as far as I know?).

> This is excelent work, thanks!

:D Thank you for the review!

--

has_draws can be addressed later. Do you want me to add a v2 with a
comment explaining what I meant by "primary batch"? Or should we just
merge as-is (while CI is still green :) and add a followup patch later)?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding

2019-06-21 Thread Tomeu Vizoso

On 6/21/19 3:05 PM, Alyssa Rosenzweig wrote:

Very nice, this will fix a few tests.


Ah, of course :)


Good riddance!


To be fair, a lot of that just got moved in with panfrost_job.. :)


Can we remove now the has_draws arg?


Maybe? The has_draws arg controls a lot of things beyond just, you
know, having draws. The whole API there needs to be overhauled quite a
bit. So yes, we can remove it, but -after- this is merged. Enough code
is being changed here as it is; we don't need more things to break.


What's the primary batch? Might warrant an explanation somewhere.


Meh, I was making up terminology as I went. The primary batch being
everything but the FRAGMENT job -- so, the SET_VALUE/VERTEX/TILER jobs
(and later COMPUTE/GEOMETRY/FUSED). I.e. everything that we actually
handle in panfrost_job and the scoreboarding module, rather than just
what we tag on in pan_fragment (which is sort of special cased? not that
you couldn't also scoreboard those but there's no reason to since
there's only ever one in the chain -- so no deps and manual index=1 --
as far as I know?).


This is excelent work, thanks!


:D Thank you for the review!

--

has_draws can be addressed later. Do you want me to add a v2 with a
comment explaining what I meant by "primary batch"? Or should we just
merge as-is (while CI is still green :) and add a followup patch later)?


Both sound good to me.

Thanks,

Tomeu
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 7/9] radv: clear the depth/stencil resolve attachment if necessary

2019-06-21 Thread Bas Nieuwenhuizen
r-b

On Wed, Jun 12, 2019 at 11:44 AM Samuel Pitoiset
 wrote:
>
> The driver might need to clear one aspect of the depth/stencil
> resolve attachment before performing the resolve itself.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_clear.c | 73 
>  1 file changed, 55 insertions(+), 18 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index 44aaf92f53d..b5824c68fe2 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -715,13 +715,14 @@ static void
>  emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
>  const VkClearAttachment *clear_att,
>  const VkClearRect *clear_rect,
> +   struct radv_subpass_attachment *ds_att,
>  uint32_t view_mask)
>  {
> struct radv_device *device = cmd_buffer->device;
> struct radv_meta_state *meta_state = &device->meta_state;
> const struct radv_subpass *subpass = cmd_buffer->state.subpass;
> const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
> -   const uint32_t pass_att = 
> subpass->depth_stencil_attachment->attachment;
> +   const uint32_t pass_att = ds_att->attachment;
> VkClearDepthStencilValue clear_value = 
> clear_att->clearValue.depthStencil;
> VkImageAspectFlags aspects = clear_att->aspectMask;
> const struct radv_image_view *iview = fb ? 
> fb->attachments[pass_att].attachment : NULL;
> @@ -761,18 +762,25 @@ emit_depthstencil_clear(struct radv_cmd_buffer 
> *cmd_buffer,
>  iview,
>  samples_log2,
>  aspects,
> -
> subpass->depth_stencil_attachment->layout,
> +ds_att->layout,
>  clear_rect,
>  clear_value);
> if (!pipeline)
> return;
>
> +   struct radv_subpass clear_subpass = {
> +   .color_count = 0,
> +   .color_attachments = NULL,
> +   .depth_stencil_attachment = ds_att,
> +   };
> +
> +   radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
> +
> radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
>  pipeline);
>
> if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
> - 
> subpass->depth_stencil_attachment->layout,
> - clear_rect, clear_value))
> + ds_att->layout, clear_rect, 
> clear_value))
> radv_update_ds_clear_metadata(cmd_buffer, iview->image,
>   clear_value, aspects);
>
> @@ -799,6 +807,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer 
> *cmd_buffer,
> radv_CmdSetStencilReference(cmd_buffer_h, 
> VK_STENCIL_FACE_FRONT_BIT,
>   prev_reference);
> }
> +
> +   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
>  }
>
>  static uint32_t
> @@ -1562,7 +1572,8 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
> const VkClearRect *clear_rect,
> enum radv_cmd_flush_bits *pre_flush,
> enum radv_cmd_flush_bits *post_flush,
> -   uint32_t view_mask)
> +   uint32_t view_mask,
> +  bool ds_resolve_clear)
>  {
> const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
> const struct radv_subpass *subpass = cmd_buffer->state.subpass;
> @@ -1588,12 +1599,16 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
> emit_color_clear(cmd_buffer, clear_att, clear_rect, 
> view_mask);
> }
> } else {
> -   const uint32_t pass_att = 
> subpass->depth_stencil_attachment->attachment;
> -   if (pass_att == VK_ATTACHMENT_UNUSED)
> +   struct radv_subpass_attachment *ds_att = 
> subpass->depth_stencil_attachment;
> +
> +   if (ds_resolve_clear)
> +   ds_att = subpass->ds_resolve_attachment;
> +
> +   if (ds_att->attachment == VK_ATTACHMENT_UNUSED)
> return;
>
> -   VkImageLayout image_layout = 
> subpass->depth_stencil_attachment->layout;
> -   const struct radv_image_view *iview = fb ? 
> fb->attachments[pass_att].attachment : NULL;
> +   VkImageLayout image_layout = ds_att->layout;
> +   const struct radv_image_view *iview = fb ? 
> fb->attachments[ds_att->attachment].attachment : NULL;
> VkClearDept

Re: [Mesa-dev] [PATCH v2 6/9] radv: decompress HTILE if the resolve src image is compressed

2019-06-21 Thread Bas Nieuwenhuizen
We only need this if the HTILE is not tc-compat?

Otherwise the read side is independent of compute/fragment shader.

On Wed, Jun 12, 2019 at 11:44 AM Samuel Pitoiset
 wrote:
>
> It's required to decompress HTILE before resolving with the
> compute path.
>
> v2: - do proper layout transitions
> - account for the framebuffer layers
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_resolve.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_meta_resolve.c 
> b/src/amd/vulkan/radv_meta_resolve.c
> index 7cadf937ee6..48ebbd1fc10 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -784,6 +784,22 @@ radv_decompress_resolve_subpass_src(struct 
> radv_cmd_buffer *cmd_buffer)
> radv_decompress_resolve_src(cmd_buffer, src_image,
> src_att.layout, 1, ®ion);
> }
> +
> +   if (subpass->ds_resolve_attachment) {
> +   struct radv_subpass_attachment src_att = 
> *subpass->depth_stencil_attachment;
> +   struct radv_image_view *src_iview =
> +   fb->attachments[src_att.attachment].attachment;
> +   struct radv_image *src_image = src_iview->image;
> +
> +   VkImageResolve region = {};
> +   region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
> +   region.srcSubresource.mipLevel = 0;
> +   region.srcSubresource.baseArrayLayer = src_iview->base_layer;
> +   region.srcSubresource.layerCount = layer_count;
> +
> +   radv_decompress_resolve_src(cmd_buffer, src_image,
> +   src_att.layout, 1, ®ion);
> +   }
>  }
>
>  /**
> @@ -809,7 +825,7 @@ radv_decompress_resolve_src(struct radv_cmd_buffer 
> *cmd_buffer,
> barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
> barrier.image = radv_image_to_handle(src_image);
> barrier.subresourceRange = (VkImageSubresourceRange) {
> -   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
> +   .aspectMask = region->srcSubresource.aspectMask,
> .baseMipLevel = region->srcSubresource.mipLevel,
> .levelCount = 1,
> .baseArrayLayer = src_base_layer,
> --
> 2.22.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface

2019-06-21 Thread Boris Brezillon
The DRM driver has a dummy implementation and the non-drm backend is
gone, so let's kill this perf counter interface.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_context.c | 14 --
 src/gallium/drivers/panfrost/pan_drm.c | 14 --
 src/gallium/drivers/panfrost/pan_screen.c  | 10 --
 src/gallium/drivers/panfrost/pan_screen.h  |  4 
 4 files changed, 42 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index e4a04dd821f5..867b49c55967 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -48,9 +48,6 @@
 #include "pan_util.h"
 #include "pan_tiler.h"
 
-static int performance_counter_number = 0;
-extern const char *pan_counters_base;
-
 /* Do not actually send anything to the GPU; merely generate the cmdstream as 
fast as possible. Disables framebuffer writes */
 //#define DRY_RUN
 
@@ -1442,17 +1439,6 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool 
flush_immediate,
 /* If readback, flush now (hurts the pipelined performance) */
 if (flush_immediate)
 screen->driver->force_flush_fragment(ctx, fence);
-
-if (screen->driver->dump_counters && pan_counters_base) {
-screen->driver->dump_counters(screen);
-
-char filename[128];
-snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", 
pan_counters_base, ++performance_counter_number);
-FILE *fp = fopen(filename, "wb");
-fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
-fclose(fp);
-}
-
 #endif
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index aed50477ff7d..f4d0721071bb 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -318,18 +318,6 @@ panfrost_drm_force_flush_fragment(struct panfrost_context 
*ctx,
 }
 }
 
-static void
-panfrost_drm_enable_counters(struct panfrost_screen *screen)
-{
-   fprintf(stderr, "unimplemented: %s\n", __func__);
-}
-
-static void
-panfrost_drm_dump_counters(struct panfrost_screen *screen)
-{
-   fprintf(stderr, "unimplemented: %s\n", __func__);
-}
-
 static unsigned
 panfrost_drm_query_gpu_version(struct panfrost_screen *screen)
 {
@@ -420,12 +408,10 @@ panfrost_create_drm_driver(int fd)
driver->base.force_flush_fragment = panfrost_drm_force_flush_fragment;
driver->base.allocate_slab = panfrost_drm_allocate_slab;
driver->base.free_slab = panfrost_drm_free_slab;
-   driver->base.enable_counters = panfrost_drm_enable_counters;
driver->base.query_gpu_version = panfrost_drm_query_gpu_version;
driver->base.init_context = panfrost_drm_init_context;
driver->base.fence_reference = panfrost_drm_fence_reference;
driver->base.fence_finish = panfrost_drm_fence_finish;
-   driver->base.dump_counters = panfrost_drm_dump_counters;
 
 return &driver->base;
 }
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 5d3acc0a0dd5..8d43e0d9f4ca 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -66,8 +66,6 @@ int pan_debug = 0;
 
 struct panfrost_driver *panfrost_create_drm_driver(int fd);
 
-const char *pan_counters_base = NULL;
-
 static const char *
 panfrost_get_name(struct pipe_screen *screen)
 {
@@ -585,14 +583,6 @@ panfrost_create_screen(int fd, struct renderonly *ro)
 
 screen->driver = panfrost_create_drm_driver(fd);
 
-/* Dump performance counters iff asked for in the environment */
-pan_counters_base = getenv("PANCOUNTERS_BASE");
-
-if (pan_counters_base) {
-screen->driver->allocate_slab(screen, &screen->perf_counters, 
64, true, 0, 0, 0);
-screen->driver->enable_counters(screen);
-}
-
 if (pan_debug & PAN_DBG_TRACE)
 pandecode_initialize();
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.h 
b/src/gallium/drivers/panfrost/pan_screen.h
index 0660be5128c7..c7504f3221d4 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -64,8 +64,6 @@ struct panfrost_driver {
struct panfrost_memory *mem);
 void (*free_imported_bo) (struct panfrost_screen *screen,
  struct panfrost_bo *bo);
-void (*enable_counters) (struct panfrost_screen *screen);
-void (*dump_counters) (struct panfrost_screen *screen);
unsigned (*query_gpu_version) (struct panfrost_screen *screen);
int (*init_context) (struct panfrost_context *ctx);
void (*fence_reference) (struct pipe_screen *screen,
@@ -83,8 +81,6 @@ struct panfrost_screen {
 struct renderonly *ro;
 struct panfrost_dri

[Mesa-dev] [PATCH 2/2] panfrost: Kill the panfrost_driver abstraction

2019-06-21 Thread Boris Brezillon
The non-drm backend is gone and there's no plan to bring it back to
life. Let's get rid of the panfrost_driver abstraction and call the
panfrost_drm_xxx() functions directly.

Signed-off-by: Boris Brezillon 
---
 src/gallium/drivers/panfrost/pan_afbc.c |   2 +-
 src/gallium/drivers/panfrost/pan_context.c  |  32 +++---
 src/gallium/drivers/panfrost/pan_drm.c  | 102 ++--
 src/gallium/drivers/panfrost/pan_job.c  |   2 +-
 src/gallium/drivers/panfrost/pan_resource.c |  18 ++--
 src/gallium/drivers/panfrost/pan_screen.c   |   8 +-
 src/gallium/drivers/panfrost/pan_screen.h   |  76 +--
 7 files changed, 109 insertions(+), 131 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_afbc.c 
b/src/gallium/drivers/panfrost/pan_afbc.c
index 4bef833f1820..5621d1f333a3 100644
--- a/src/gallium/drivers/panfrost/pan_afbc.c
+++ b/src/gallium/drivers/panfrost/pan_afbc.c
@@ -138,7 +138,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct 
panfrost_resource *rsr
 unsigned buffer_size = header_size + body_size;
 
 /* Allocate the AFBC slab itself, large enough to hold the above */
-screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab,
+panfrost_drm_allocate_slab(screen, &rsrc->bo->afbc_slab,
ALIGN(buffer_size, 4096) / 4096,
true, 0, 0, 0);
 
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 867b49c55967..14d29f5d20f1 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -87,7 +87,7 @@ panfrost_enable_checksum(struct panfrost_context *ctx, struct 
panfrost_resource
 /* 8 byte checksum per tile */
 rsrc->bo->checksum_stride = tile_w * 8;
 int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096);
-screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, 
false, 0, 0, 0);
+panfrost_drm_allocate_slab(screen, &rsrc->bo->checksum_slab, pages, 
false, 0, 0, 0);
 
 rsrc->bo->has_checksum = true;
 }
@@ -1431,14 +1431,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, 
bool flush_immediate,
 /* If visual, we can stall a frame */
 
 if (!flush_immediate)
-screen->driver->force_flush_fragment(ctx, fence);
+panfrost_drm_force_flush_fragment(ctx, fence);
 
 screen->last_fragment_flushed = false;
 screen->last_job = job;
 
 /* If readback, flush now (hurts the pipelined performance) */
 if (flush_immediate)
-screen->driver->force_flush_fragment(ctx, fence);
+panfrost_drm_force_flush_fragment(ctx, fence);
 #endif
 }
 
@@ -2503,11 +2503,11 @@ panfrost_destroy(struct pipe_context *pipe)
 if (panfrost->blitter)
 util_blitter_destroy(panfrost->blitter);
 
-screen->driver->free_slab(screen, &panfrost->scratchpad);
-screen->driver->free_slab(screen, &panfrost->varying_mem);
-screen->driver->free_slab(screen, &panfrost->shaders);
-screen->driver->free_slab(screen, &panfrost->tiler_heap);
-screen->driver->free_slab(screen, &panfrost->tiler_polygon_list);
+panfrost_drm_free_slab(screen, &panfrost->scratchpad);
+panfrost_drm_free_slab(screen, &panfrost->varying_mem);
+panfrost_drm_free_slab(screen, &panfrost->shaders);
+panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
+panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list);
 
 ralloc_free(pipe);
 }
@@ -2660,12 +2660,12 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
 ctx->transient_pools[i].entries[0] = (struct 
panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, 
HEAP_TRANSIENT);
 }
 
-screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 
0, 0);
-screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 
PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
-screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, 
PAN_ALLOCATE_EXECUTE, 0, 0);
-screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, 
PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
-screen->driver->allocate_slab(screen, &ctx->tiler_polygon_list, 
128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
-screen->driver->allocate_slab(screen, &ctx->tiler_dummy, 1, false, 
PAN_ALLOCATE_INVISIBLE, 0, 0);
+panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 
0);
+panfrost_drm_allocate_slab(screen, &ctx->varying_mem, 16384, false, 
PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
+panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, 
PAN_ALLOCATE_EXECUTE, 0, 0);
+panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 32768, 

[Mesa-dev] Proposal for the future of www.mesa3d.org

2019-06-21 Thread Erik Faye-Lund
A while back, Laura and Jean was working on a Sphinx-conversion of the
mesa-documentation. Sadly this work stranded due to it also trying to
move to using GitLab Pages for hosting www.mesa3d.org, and because the
documentation and the websit eis the same thing, this lead to problems
with hosting the release-archive (www.mesa3d.org/archive/).

Since then, I've taken a look at trying to revive this work. So far,
I've taken most of the changed Laura did to the website post-RST
conversion, and performed them before instead. I've also automated more
of the conversion process, so we can easier get an up-to-date
conversion. The result can be viewed here:

https://kusma.pages.freedesktop.org/mesa/
https://gitlab.freedesktop.org/kusma/mesa/tree/docs-sphinx-v2

Please note that there's some differences:
- I don't do any "mesa-specific styling". This can be done on top if
needed, simply by cherry-picking Laura's commits for this. But I'm not
sure we need it, more about this further down.
- Some of the commit history might be incorrectly attributed to me
instead of Laura. I intend to fix this up before upstreaming anything
here.
- The conversion isn't entirely up-to-date, but it's *fairly* recent.

So yeah, the big elephant in the room is what to do with 
www.mesa3d.org/archive. This is where I have an alternative suggestion:

How about we split the documentation and the website into two sites, 
www.mesa3d.org and docs.mesa3d.org, and maintain the website in a
separate repository? We would of course have to set up some redirects
to make old URLs point to the latest version (at least for a transition
period).

This has some additional benefits:
- We don't need to push things to master to update things like news,
that aren't really related to the code.
- We can separate information that is technical documentation from
information that are is "project marketing".
- ...And because we don't need for the docs to appeal as "project
marketing", we can keep the neutral readthedocs theme as-is, as it's a
bit more easy on the eye IMO.
- It makes the article index a bit more logical, as there's a few
articles that doesn't really make sense to read after you already have
the source tree. Why would you wonder who the webmaster is
(docs/webmaster.html) or where to download mesa (docs/download.html)
when reading the source?
- We can host docs.mesa3d.org using GitLab pages (or point it to
something like readthedocs.org) without having to change the hosting
for www.mesa3d.org.

In addition to this, I've also had a look at modernizing www.mesa.org 
as well, and I've made a proposal for a new, responisive website:

https://kusma.pages.freedesktop.org/
https://gitlab.freedesktop.org/kusma/kusma.pages.freedesktop.org/

Quite a few things to notice:
- Many links here forward to docs.mesa3d.org, which doesn't exist yet.
- The redirects are done using meta-refresh tags instead of HTTP
redirects, so they will only be redirected by an actual user-agent, not
by curl or wget.
- The site is using logos of Khronos APIs which might not be OK without
approval. The legality of this needs to be researched.
- Most of the content here is "usable placeholder" text, but by no
means final. For instance, the descriptions of the APIs and drivers
probably needs work. Especially the driver-decription should probably
be written by the driver-teams rather than me.
- Some drivers are missing. I just didn't bother writing more
placeholder.
- What content goes in which site is by no means decided on.
- Some content isn't yet in either site; in particular, non-html files,
like for instance the contents of www.mesa3d.org/specs. And since
GitLab pages doesn't do directory listings, that folder (regardless of
where it'd be reciding) would need an index added.
- The site is made using Jekyll, but any static-site generator would
do, really.

The redirect-issue is due to the prototype currently being hosted in
GitLab pages, and is a GitLab pages limitation. See 
https://gitlab.com/gitlab-org/gitlab-pages/issues/24 for more details.
I doubt this would be a problem for documentation, but the same
approach won't work for www.mesa3d.org/archive. Without solving that
problem, we can't really go live with this while hosting it on GitLab
pages.

But we could go forward *without* hosting www.mesa3d.org in GitLab
pages in the short term. I don't know how we currently deploy the
website, I guess that's done manually by someone at some points? If so,
we'd just update the manual recipie, I guess.

I think the long-term goal should be to also move www.mesa3d.org to
GitLab pages as well, and I have some ideas for how to deal with the 
www.mesa3d.org/archive-problem, but this is a much longer discussion,
and this email is already too long. So if someone wants to discuss
that, feel free to reply, and I'll happily tell you about it!

Anyway, thoughts? Objections?



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop

Re: [Mesa-dev] [PATCH] panfrost: Implement command stream scoreboarding

2019-06-21 Thread Rohan Garg
On Thursday, 20 June 2019 20:20:38 CEST Alyssa Rosenzweig wrote:
> This is a rather complex change, adding a lot of code but ideally
> cleaning up quite a bit as we go.
> 
> Within a batch (single frame), there are multiple distinct Mali job
> types: SET_VALUE, VERTEX, TILER, FRAGMENT for the few that we emit right
> now (eventually more for compute and geometry shaders). Each hardware
> job has a mali_job_descriptor_header, which contains three fields of
> interest: job index, a dependencies list, and a next job pointer.
> 
> The next job pointer in each job is used to form a linked list of
> submitted jobs. Easy enough.
> 
> The job index and dependencies list, however, are used to form a
> dependency graph (a DAG, where each hardware job is a node and each
> dependency is a directed edge). Internally, this sets up a scoreboarding
> data structure for the hardware to dispatch jobs in parallel, enabling
> (for example) vertex shaders from different draws to execute in parallel
> while there are strict dependencies between tiling the geometry of a
> draw and running that vertex shader.
> 
> For a while, we got by with an incredible series of total hacks,
> manually coding indices, lists, and dependencies. That worked for a
> moment, but combinatorial kaboom kicked in and it became an
> unmaintainable mess of spaghetti code.
> 
> We can do better. This commit explicitly handles the scoreboarding by
> providing high-level manipulation for jobs. Rather than a command like
> "set dependency #2 to index 17", we can express quite naturally "add a
> dependency from job T on job V". Instead of some open-coded logic to
> copy a draw pointer into a delicate context array, we now have an
> elegant exposed API to simple "queue a job of type XYZ".
> 
> The design is influenced by both our current requirements (standard ES2
> draws and u_blitter) as well as the need for more complex scheduling in
> the future. For instance, blits can be optimized to use only a tiler
> job, without a vertex job first (since the screen-space vertices are
> known ahead-of-time) -- causing tiler-only jobs. Likewise, when using
> transform feedback with rasterizer discard enabled, vertex jobs are
> created (to run vertex shaders) with no corresponding tiler job. Both of
> these cases break the original model and could not be expressed with the
> open-coded logic. More generally, this will make it easier to add
> support for compute shaders, geometry shaders, and fused jobs (an
> optimization available on Bifrost).
> 
> Incidentally, this moves quite a bit of state from the driver context to
> the batch, which helps with Rohan's refactor to eventually permit
> pipelining across framebuffers (one important outstanding optimization
> for FBO-heavy workloads).
> 
> Signed-off-by: Alyssa Rosenzweig 
> ---
>  src/gallium/drivers/panfrost/meson.build  |   1 +
>  src/gallium/drivers/panfrost/pan_context.c| 132 +
>  src/gallium/drivers/panfrost/pan_context.h|  19 +-
>  src/gallium/drivers/panfrost/pan_drm.c|   6 +-
>  src/gallium/drivers/panfrost/pan_job.c|  18 +-
>  src/gallium/drivers/panfrost/pan_job.h|  61 +++
>  src/gallium/drivers/panfrost/pan_scoreboard.c | 453 ++
>  7 files changed, 548 insertions(+), 142 deletions(-)
>  create mode 100644 src/gallium/drivers/panfrost/pan_scoreboard.c
> 
> diff --git a/src/gallium/drivers/panfrost/meson.build
> b/src/gallium/drivers/panfrost/meson.build index 43d73ce2086..4298242f6b9
> 100644
> --- a/src/gallium/drivers/panfrost/meson.build
> +++ b/src/gallium/drivers/panfrost/meson.build
> @@ -57,6 +57,7 @@ files_panfrost = files(
>'pan_blend_shaders.c',
>'pan_pretty_print.c',
>'pan_fragment.c',
> +  'pan_scoreboard.c',
>'pan_sfbd.c',
>'pan_mfbd.c',
>'pan_tiler.c',
> diff --git a/src/gallium/drivers/panfrost/pan_context.c
> b/src/gallium/drivers/panfrost/pan_context.c index 41656236b5b..d8c5510a31e
> 100644
> --- a/src/gallium/drivers/panfrost/pan_context.c
> +++ b/src/gallium/drivers/panfrost/pan_context.c
> @@ -517,15 +517,6 @@ panfrost_default_shader_backend(struct panfrost_context
> *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); }
> 
> -static void
> -panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr
> next) -{
> -if (first->job_descriptor_size)
> -first->next_job_64 = (u64) (uintptr_t) next;
> -else
> -first->next_job_32 = (u32) (uintptr_t) next;
> -}
> -
>  /* Generates a vertex/tiler job. This is, in some sense, the heart of the
>   * graphics command stream. It should be called once per draw, accordding
> to * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs,
> but in @@ -535,12 +526,8 @@ panfrost_link_job_pair(struct
> mali_job_descriptor_header *first, mali_ptr next) struct panfrost_transfer
>  panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
>  {
> -/* Each draw call corresponds to

[Mesa-dev] [PATCH] radv: add support for VK_AMD_buffer_marker

2019-06-21 Thread Samuel Pitoiset
This simple extension might be useful for debugging purposes.
GAPID has support for it.

Signed-off-by: Samuel Pitoiset 
---

A simple crucible test:
https://gitlab.freedesktop.org/mesa/crucible/merge_requests/47

 src/amd/vulkan/radv_cmd_buffer.c  | 35 +++
 src/amd/vulkan/radv_extensions.py |  1 +
 2 files changed, 36 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 9b4d4528028..215ccced144 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -5716,3 +5716,38 @@ void radv_CmdDrawIndirectByteCountEXT(
 
radv_draw(cmd_buffer, &info);
 }
+
+/* VK_AMD_buffer_marker */
+void radv_CmdWriteBufferMarkerAMD(
+VkCommandBuffer commandBuffer,
+VkPipelineStageFlagBits pipelineStage,
+VkBufferdstBuffer,
+VkDeviceSizedstOffset,
+uint32_tmarker)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset;
+
+   si_emit_cache_flush(cmd_buffer);
+
+   if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
+   COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+   COPY_DATA_WR_CONFIRM);
+   radeon_emit(cs, marker);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   } else {
+   si_cs_emit_write_event_eop(cs,
+  
cmd_buffer->device->physical_device->rad_info.chip_class,
+  radv_cmd_buffer_uses_mec(cmd_buffer),
+  V_028A90_BOTTOM_OF_PIPE_TS, 0,
+  EOP_DATA_SEL_VALUE_32BIT,
+  va, marker,
+  cmd_buffer->gfx9_eop_bug_va);
+   }
+}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index b676cb8b8e2..1708173817b 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -130,6 +130,7 @@ EXTENSIONS = [
 Extension('VK_EXT_transform_feedback',1, True),
 Extension('VK_EXT_vertex_attribute_divisor',  3, True),
 Extension('VK_EXT_ycbcr_image_arrays',1, True),
+Extension('VK_AMD_buffer_marker', 1, True),
 Extension('VK_AMD_draw_indirect_count',   1, True),
 Extension('VK_AMD_gcn_shader',1, True),
 Extension('VK_AMD_gpu_shader_half_float', 1, 
'device->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800'),
-- 
2.22.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [ANNOUNCE] Mesa 19.1.1 release candidate

2019-06-21 Thread Juan A. Suarez Romero
Hello list,

The candidate for the Mesa 19.1.1 is now available. Currently we have:
 - 27 queued
 - 0 nominated (outstanding)
 - and 0 rejected patch


The current queue consists mostly in fixes for different drivers (RADV, ANV,
Nouveau, Virgl, V3D, R300g, ...)

The queue also contains different fixes for different parts (Meson build, GLX,
etc).

Take a look at section "Mesa stable queue" for more information


Testing reports/general approval

Any testing reports (or general approval of the state of the branch) will be
greatly appreciated.

The plan is to have 19.1.1 this Tuesday (25th June), around or shortly after
10:00 GMT.

If you have any questions or suggestions - be that about the current patch queue
or otherwise, please go ahead.


Trivial merge conflicts
---
commit 25a34df61439b25645d03510d6354cb1f5e8a185
Author: Kenneth Graunke 

iris: Fix iris_flush_and_dirty_history to actually dirty history.

(cherry picked from commit 64fb20ed326fa0e524582225faaa4bb28f6e4349)


Cheers,
J.A.


Mesa stable queue
-

Nominated (0)
==

Queued (27)
===
Alejandro Piñeiro (1):
  v3d: fix checking twice auf flag

Bas Nieuwenhuizen (5):
  radv: Skip transitions coming from external queue.
  radv: Decompress DCC when the image format is not allowed for buffers.
  radv: Fix vulkan build in meson.
  anv: Fix vulkan build in meson.
  meson: Allow building radeonsi with just the android platform.

Dave Airlie (1):
  nouveau: fix frees in unsupported IR error paths.

Eduardo Lima Mitev (1):
  freedreno/a5xx: Fix indirect draw max_indices calculation

Eric Engestrom (3):
  util/futex: fix dangling pointer use
  glx: fix glvnd pointer types
  util/os_file: resize buffer to what was actually needed

Gert Wollny (1):
  virgl: Assume sRGB write control for older guest kernels or virglrenderer 
hosts

Haihao Xiang (1):
  i965: support UYVY for external import only

Jason Ekstrand (1):
  anv: Set STATE_BASE_ADDRESS upper bounds on gen7

Kenneth Graunke (2):
  glsl: Fix out of bounds read in shader_cache_read_program_metadata
  iris: Fix iris_flush_and_dirty_history to actually dirty history.

Kevin Strasser (2):
  gallium/winsys/kms: Fix dumb buffer bpp
  st/mesa: Add rgbx handling for fp formats

Lionel Landwerlin (2):
  anv: do not parse genxml data without INTEL_DEBUG=bat
  intel/dump: fix segfault when the app hasn't accessed the device

Mathias Fröhlich (1):
  egl: Don't add hardware device if there is no render node v2.

Richard Thier (1):
  r300g: restore performance after RADEON_FLAG_NO_INTERPROCESS_SHARING was 
added

Rob Clark (1):
  freedreno/a6xx: un-swap X24S8_UINT

Samuel Pitoiset (4):
  radv: fix occlusion queries on VegaM
  radv: fix VK_EXT_memory_budget if one heap isn't available
  radv: fix FMASK expand with SRGB formats
  radv: disable viewport clamping even if FS doesn't write Z


Rejected (0)
=


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] panfrost: Kill the perf counters interface

2019-06-21 Thread Alyssa Rosenzweig
R-b, that's long overdue seeing as the DRM driver has a totally
different (better? :) ) interface anyway (Thank you!)



signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] panfrost: Kill the panfrost_driver abstraction

2019-06-21 Thread Alyssa Rosenzweig
Oh, nice! I'm going to give this a tentative Reviewed-by. The changes
themselves look good (and there shouldn't be functional changes). I do
want this to happen; I just need to give some thought to ensuring that
yes, this is really what we want to do.

I'm having troubles thinking of when we might not want this. Two issues
come to mind: Bifrost bring-up and device-free testing. That said, I
think Bifrost is a non-issue since the DRM driver will probably support
that before we do. As for device-free testing, it should not be too hard
to include a mock version of pan_drm.c that's compile-time selected (or
even a mock kernel module?). It's a useful thing to desire since often
you're just looking for issues that can be tracked via
PAN_MESA_DEBUG=trace and MIDGARD_MESA_DEBUG=shaders without needing a
physical board. Then again, boards are so cheap that it's probably not
worth our while to bother with that levle of abstraction, so maybe this
is a no-op as well.

On Fri, Jun 21, 2019 at 03:57:27PM +0200, Boris Brezillon wrote:
> The non-drm backend is gone and there's no plan to bring it back to
> life. Let's get rid of the panfrost_driver abstraction and call the
> panfrost_drm_xxx() functions directly.
> 
> Signed-off-by: Boris Brezillon 
> ---
>  src/gallium/drivers/panfrost/pan_afbc.c |   2 +-
>  src/gallium/drivers/panfrost/pan_context.c  |  32 +++---
>  src/gallium/drivers/panfrost/pan_drm.c  | 102 ++--
>  src/gallium/drivers/panfrost/pan_job.c  |   2 +-
>  src/gallium/drivers/panfrost/pan_resource.c |  18 ++--
>  src/gallium/drivers/panfrost/pan_screen.c   |   8 +-
>  src/gallium/drivers/panfrost/pan_screen.h   |  76 +--
>  7 files changed, 109 insertions(+), 131 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_afbc.c 
> b/src/gallium/drivers/panfrost/pan_afbc.c
> index 4bef833f1820..5621d1f333a3 100644
> --- a/src/gallium/drivers/panfrost/pan_afbc.c
> +++ b/src/gallium/drivers/panfrost/pan_afbc.c
> @@ -138,7 +138,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct 
> panfrost_resource *rsr
>  unsigned buffer_size = header_size + body_size;
>  
>  /* Allocate the AFBC slab itself, large enough to hold the above */
> -screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab,
> +panfrost_drm_allocate_slab(screen, &rsrc->bo->afbc_slab,
> ALIGN(buffer_size, 4096) / 4096,
> true, 0, 0, 0);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_context.c 
> b/src/gallium/drivers/panfrost/pan_context.c
> index 867b49c55967..14d29f5d20f1 100644
> --- a/src/gallium/drivers/panfrost/pan_context.c
> +++ b/src/gallium/drivers/panfrost/pan_context.c
> @@ -87,7 +87,7 @@ panfrost_enable_checksum(struct panfrost_context *ctx, 
> struct panfrost_resource
>  /* 8 byte checksum per tile */
>  rsrc->bo->checksum_stride = tile_w * 8;
>  int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096);
> -screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, 
> pages, false, 0, 0, 0);
> +panfrost_drm_allocate_slab(screen, &rsrc->bo->checksum_slab, pages, 
> false, 0, 0, 0);
>  
>  rsrc->bo->has_checksum = true;
>  }
> @@ -1431,14 +1431,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, 
> bool flush_immediate,
>  /* If visual, we can stall a frame */
>  
>  if (!flush_immediate)
> -screen->driver->force_flush_fragment(ctx, fence);
> +panfrost_drm_force_flush_fragment(ctx, fence);
>  
>  screen->last_fragment_flushed = false;
>  screen->last_job = job;
>  
>  /* If readback, flush now (hurts the pipelined performance) */
>  if (flush_immediate)
> -screen->driver->force_flush_fragment(ctx, fence);
> +panfrost_drm_force_flush_fragment(ctx, fence);
>  #endif
>  }
>  
> @@ -2503,11 +2503,11 @@ panfrost_destroy(struct pipe_context *pipe)
>  if (panfrost->blitter)
>  util_blitter_destroy(panfrost->blitter);
>  
> -screen->driver->free_slab(screen, &panfrost->scratchpad);
> -screen->driver->free_slab(screen, &panfrost->varying_mem);
> -screen->driver->free_slab(screen, &panfrost->shaders);
> -screen->driver->free_slab(screen, &panfrost->tiler_heap);
> -screen->driver->free_slab(screen, &panfrost->tiler_polygon_list);
> +panfrost_drm_free_slab(screen, &panfrost->scratchpad);
> +panfrost_drm_free_slab(screen, &panfrost->varying_mem);
> +panfrost_drm_free_slab(screen, &panfrost->shaders);
> +panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
> +panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list);
>  
>  ralloc_free(pipe);
>  }
> @@ -2660,12 +2660,12 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
>  ctx->transient_pools[i].entries[0] = (struct 
> panfrost_m

[Mesa-dev] [PATCH 1/3] gallium/util: rewrite depth-stencil blit shaders

2019-06-21 Thread Marek Olšák
From: Marek Olšák 

- merge all 3 functions (Z, S, ZS)
- don't write the color output
- read the value from texel.x, then write it to position.z or stencil.y
  (don't use the value from texel.y or texel.z)
---
 src/gallium/auxiliary/util/u_blitter.c|  19 +-
 src/gallium/auxiliary/util/u_simple_shaders.c | 185 --
 src/gallium/auxiliary/util/u_simple_shaders.h |  25 +--
 3 files changed, 46 insertions(+), 183 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 3dc49cd0958..8e4807ec670 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1010,24 +1010,22 @@ void *blitter_get_fs_texfetch_depth(struct 
blitter_context_priv *ctx,
   if (use_txf)
  shader = &ctx->fs_texfetch_depth[target][1];
   else
  shader = &ctx->fs_texfetch_depth[target][0];
 
   /* Create the fragment shader on-demand. */
   if (!*shader) {
  enum tgsi_texture_type tgsi_tex;
  assert(!ctx->cached_all_shaders);
  tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
- *shader =
-util_make_fragment_tex_shader_writedepth(pipe, tgsi_tex,
- TGSI_INTERPOLATE_LINEAR,
- ctx->has_tex_lz, use_txf);
+ *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_Z, tgsi_tex,
+ctx->has_tex_lz, use_txf);
   }
 
   return *shader;
}
 }
 
 static inline
 void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
enum pipe_texture_target target,
unsigned nr_samples,
@@ -1055,25 +1053,22 @@ void *blitter_get_fs_texfetch_depthstencil(struct 
blitter_context_priv *ctx,
   if (use_txf)
  shader = &ctx->fs_texfetch_depthstencil[target][1];
   else
  shader = &ctx->fs_texfetch_depthstencil[target][0];
 
   /* Create the fragment shader on-demand. */
   if (!*shader) {
  enum tgsi_texture_type tgsi_tex;
  assert(!ctx->cached_all_shaders);
  tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
- *shader =
-util_make_fragment_tex_shader_writedepthstencil(pipe, tgsi_tex,
-
TGSI_INTERPOLATE_LINEAR,
-ctx->has_tex_lz,
-use_txf);
+ *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_ZS, tgsi_tex,
+ctx->has_tex_lz, use_txf);
   }
 
   return *shader;
}
 }
 
 static inline
 void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
   enum pipe_texture_target target,
   unsigned nr_samples,
@@ -1101,24 +1096,22 @@ void *blitter_get_fs_texfetch_stencil(struct 
blitter_context_priv *ctx,
   if (use_txf)
  shader = &ctx->fs_texfetch_stencil[target][1];
   else
  shader = &ctx->fs_texfetch_stencil[target][0];
 
   /* Create the fragment shader on-demand. */
   if (!*shader) {
  enum tgsi_texture_type tgsi_tex;
  assert(!ctx->cached_all_shaders);
  tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
- *shader =
-util_make_fragment_tex_shader_writestencil(pipe, tgsi_tex,
-   TGSI_INTERPOLATE_LINEAR,
-   ctx->has_tex_lz, 
use_txf);
+ *shader = util_make_fs_blit_zs(pipe, PIPE_MASK_S, tgsi_tex,
+ctx->has_tex_lz, use_txf);
   }
 
   return *shader;
}
 }
 
 
 /**
  * Generate and save all fragment shaders that we will ever need for
  * blitting.  Drivers which use the 'draw' fallbacks will typically use
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c 
b/src/gallium/auxiliary/util/u_simple_shaders.c
index d62a65579ae..c111eaf1db5 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -371,187 +371,76 @@ util_make_fragment_tex_shader(struct pipe_context *pipe,
 {
return util_make_fragment_tex_shader_writemask( pipe,
tex_target,
interp_mode,
TGSI_WRITEMASK_XYZW,
stype, dtype, 
load_level_zero,
use_txf);
 }
 
 
-/**
- * Make a simple fragment texture shader which reads an X component from
- * a texture and writes it as depth.
- */
-void *
-util_make_fragment_tex_shader_writedepth(struct pipe

[Mesa-dev] [PATCH 2/3] gallium/u_blitter: implement copying from ZS to color and vice versa

2019-06-21 Thread Marek Olšák
From: Marek Olšák 

This is for drivers that can't map depth and stencil and need to blit
them to a color texture for CPU access.

This also useful for drivers using separate depth and stencil.
---
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h  |   6 +
 src/gallium/auxiliary/util/u_blitter.c| 147 
 src/gallium/auxiliary/util/u_blitter.h|  29 
 src/gallium/auxiliary/util/u_simple_shaders.c | 161 ++
 src/gallium/auxiliary/util/u_simple_shaders.h |   6 +
 5 files changed, 314 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 54a1ee15b68..2ade618db00 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -158,20 +158,26 @@ OP11(UARL)
 
 OP13(UCMP)
 OP11(IABS)
 OP11(ISSG)
 
 OP11(IMG2HND)
 OP11(SAMP2HND)
 
 OP12(IMUL_HI)
 OP12(UMUL_HI)
+OP13(UBFE)
+OP11(F2D)
+OP11(D2F)
+OP11(U2D)
+OP11(D2U)
+OP12(DMUL)
 
 #undef OP00
 #undef OP01
 #undef OP10
 #undef OP11
 #undef OP12
 #undef OP13
 
 #undef OP00_LBL
 #undef OP01_LBL
diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 8e4807ec670..18a5c272454 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -95,20 +95,25 @@ struct blitter_context_priv
 
/* FS which outputs one sample from a multisample texture. */
void *fs_texfetch_col_msaa[5][PIPE_MAX_TEXTURE_TYPES];
void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES];
void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES];
void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES];
 
/* FS which outputs an average of all samples. */
void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
 
+   /* FS which unpacks color to ZS or packs ZS to color, matching
+* the ZS format. See util_blitter_get_color_format_for_zs().
+*/
+   void *fs_pack_color_zs[TGSI_TEXTURE_COUNT][10];
+
/* Blend state. */
void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */
void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1];
 
/* Depth stencil alpha state. */
void *dsa_write_depth_stencil;
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
void *dsa_keep_depth_write_stencil;
 
@@ -504,20 +509,27 @@ void util_blitter_destroy(struct blitter_context *blitter)
  ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i]);
   if (ctx->fs_texfetch_stencil_msaa[i])
  ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i]);
 
   for (j = 0; j< ARRAY_SIZE(ctx->fs_resolve[i]); j++)
  for (f = 0; f < 2; f++)
 if (ctx->fs_resolve[i][j][f])
ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]);
}
 
+   for (i = 0; i < ARRAY_SIZE(ctx->fs_pack_color_zs); i++) {
+  for (j = 0; j < ARRAY_SIZE(ctx->fs_pack_color_zs[0]); j++) {
+ if (ctx->fs_pack_color_zs[i][j])
+ctx->delete_fs_state(pipe, ctx->fs_pack_color_zs[i][j]);
+  }
+   }
+
if (ctx->fs_empty)
   ctx->delete_fs_state(pipe, ctx->fs_empty);
if (ctx->fs_write_one_cbuf)
   ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
if (ctx->fs_write_all_cbufs)
   ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
 
pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear);
pipe->delete_sampler_state(pipe, ctx->sampler_state_rect);
pipe->delete_sampler_state(pipe, ctx->sampler_state_linear);
@@ -975,20 +987,58 @@ static void *blitter_get_fs_texfetch_col(struct 
blitter_context_priv *ctx,
  *shader = util_make_fragment_tex_shader(pipe, tgsi_tex,
  TGSI_INTERPOLATE_LINEAR,
  stype, dtype,
  ctx->has_tex_lz, use_txf);
   }
 
   return *shader;
}
 }
 
+static inline
+void *blitter_get_fs_pack_color_zs(struct blitter_context_priv *ctx,
+   enum pipe_texture_target target,
+   unsigned nr_samples,
+   enum pipe_format zs_format,
+   bool dst_is_color)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+   enum tgsi_texture_type tgsi_tex =
+  util_pipe_tex_to_tgsi_tex(target, nr_samples);
+   int format_index = zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ? 0 :
+  zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ? 1 :
+  zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 :
+  zs_format == PIPE_FORMAT_Z24X8_UNORM ? 3 :
+  zs_format == PIPE_FORMAT_X8Z24_UNORM ? 4 : -1;
+
+   if (format_index == -1) {
+  assert(0);
+  return NULL;
+   }
+
+   /* The first 5 shaders pack ZS to color, the last 5 shaders unpa

[Mesa-dev] [PATCH 3/3] radeonsi: use a fragment shader blit instead of DB->CB copy for ZS CPU mappings

2019-06-21 Thread Marek Olšák
From: Marek Olšák 

This mainly removes and simplifies code that is no longer needed.

There were some issues with the DB->CB stencil copy on gfx10, so let's
just use a fragment shader blit for all ZS mappings. It's more reliable.
---
 src/gallium/drivers/radeonsi/si_blit.c|  29 +---
 src/gallium/drivers/radeonsi/si_pipe.h|   9 +-
 src/gallium/drivers/radeonsi/si_state.c   |   2 +-
 src/gallium/drivers/radeonsi/si_texture.c | 166 +++---
 4 files changed, 52 insertions(+), 154 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 5806342cca9..638f2ee4d24 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -173,45 +173,20 @@ si_blit_dbcb_copy(struct si_context *sctx,
}
 
sctx->decompression_enabled = false;
sctx->dbcb_depth_copy_enabled = false;
sctx->dbcb_stencil_copy_enabled = false;
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
return fully_copied_levels;
 }
 
-void si_blit_decompress_depth(struct pipe_context *ctx,
- struct si_texture *texture,
- struct si_texture *staging,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned first_sample, unsigned last_sample)
-{
-   const struct util_format_description *desc;
-   unsigned planes = 0;
-
-   assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");
-
-   desc = util_format_description(staging->buffer.b.b.format);
-
-   if (util_format_has_depth(desc))
-   planes |= PIPE_MASK_Z;
-   if (util_format_has_stencil(desc))
-   planes |= PIPE_MASK_S;
-
-   si_blit_dbcb_copy(
-   (struct si_context *)ctx, texture, staging, planes,
-   u_bit_consecutive(first_level, last_level - first_level + 1),
-   first_layer, last_layer, first_sample, last_sample);
-}
-
 /* Helper function for si_blit_decompress_zs_in_place.
  */
 static void
 si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
  struct si_texture *texture,
  unsigned planes, unsigned level_mask,
  unsigned first_layer, unsigned last_layer)
 {
struct pipe_surface *zsurf, surf_tmpl = {{0}};
unsigned layer, max_layer, checked_last_layer;
@@ -348,21 +323,21 @@ si_decompress_depth(struct si_context *sctx,
u_log_printf(sctx->log,
 
"\n\n"
 "Decompress Depth (levels %u - %u, levels Z: 0x%x 
S: 0x%x)\n\n",
 first_level, last_level, levels_z, levels_s);
 
/* We may have to allocate the flushed texture here when called from
 * si_decompress_subresource.
 */
if (copy_planes &&
(tex->flushed_depth_texture ||
-si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) {
+si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) {
struct si_texture *dst = tex->flushed_depth_texture;
unsigned fully_copied_levels;
unsigned levels = 0;
 
assert(tex->flushed_depth_texture);
 
if (util_format_is_depth_and_stencil(dst->buffer.b.b.format))
copy_planes = PIPE_MASK_Z | PIPE_MASK_S;
 
if (copy_planes & PIPE_MASK_Z) {
@@ -1242,21 +1217,21 @@ static void si_blit(struct pipe_context *ctx,
assert(util_blitter_is_blit_supported(sctx->blitter, info));
 
/* The driver doesn't decompress resources automatically while
 * u_blitter is rendering. */
vi_disable_dcc_if_incompatible_format(sctx, info->src.resource,
  info->src.level,
  info->src.format);
vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource,
  info->dst.level,
  info->dst.format);
-   si_decompress_subresource(ctx, info->src.resource, info->mask,
+   si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS,
  info->src.level,
  info->src.box.z,
  info->src.box.z + info->src.box.depth - 1);
 
if (sctx->screen->debug_flags & DBG(FORCE_DMA) &&
util_try_blit_via_copy_region(ctx, info))
return;
 
si_blitter_begin(sctx, SI_BLIT |
 (info->render_condition_enable ? 0 : 
SI_DISABLE_RENDER_COND));
diff --git a/src/gallium/drivers/radeonsi/si_p

[Mesa-dev] [PATCH] ac: change ac_query_gpu_info() signatures

2019-06-21 Thread Emil Velikov
From: Emil Velikov 

Currently libdrm_amdgpu provides a typedef of the various handles. While
the goal was to make those opaque, it effectively became part of the API

To the best of my knowledge there are two ways to have opaque handles:
 - "typedef void *foo;" - rather messy IMHO
 - "stuct foo;" and use "struct foo *" through the API

In our case amdgpU_device_handle is used only internally, plus
respective code is not used or applicable for r300 and r600. Hence  we
copied the typedef.

Seemingly this will be a problem since a libdrm_amdgpu wants to change
the API, while not updating the code(?).

Either way, we can safely s/amdgpU_device_handle/void */ and carry on.

Cc: Michel Dänzer 
Signed-off-by: Emil Velikov 
---
 src/amd/common/ac_gpu_info.c | 3 ++-
 src/amd/common/ac_gpu_info.h | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index db7f9e47ce1..8bd47cc26b3 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -92,7 +92,7 @@ static bool has_syncobj(int fd)
return value ? true : false;
 }
 
-bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
+bool ac_query_gpu_info(int fd, void *dev_p,
   struct radeon_info *info,
   struct amdgpu_gpu_info *amdinfo)
 {
@@ -104,6 +104,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
struct amdgpu_gds_resource_info gds = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature 
= 0;
int r, i, j;
+   amdgpu_device_handle dev = dev_p;
drmDevicePtr devinfo;
 
/* Get PCI info. */
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 11fb77eee87..ba4940af142 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -35,8 +35,6 @@
 extern "C" {
 #endif
 
-/* Prior to C11 the following may trigger a typedef redeclaration warning */
-typedef struct amdgpu_device *amdgpu_device_handle;
 struct amdgpu_gpu_info;
 
 struct radeon_info {
@@ -147,7 +145,7 @@ struct radeon_info {
uint32_tcik_macrotile_mode_array[16];
 };
 
-bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
+bool ac_query_gpu_info(int fd, void *dev_p,
   struct radeon_info *info,
   struct amdgpu_gpu_info *amdinfo);
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev