From: Nicolai Hähnle <[email protected]>

This is only needed for r600 which doesn't have ARB_query_buffer_object and
therefore wouldn't really need the fences, but let's be optimistic about
filling in this feature gap eventually.

Cc: Dieter Nützel <[email protected]>
---
 src/gallium/drivers/radeon/r600_pipe_common.c |  7 ++++++-
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
 src/gallium/drivers/radeon/r600_query.c       | 13 +++++++------
 src/gallium/drivers/radeonsi/si_perfcounter.c |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index e9377b2..6dc92cb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -73,21 +73,21 @@ void radeon_shader_binary_clean(struct radeon_shader_binary 
*b)
        FREE(b->global_symbol_offsets);
        FREE(b->relocs);
        FREE(b->disasm_string);
        FREE(b->llvm_ir_string);
 }
 
 /*
  * pipe_context
  */
 
-void r600_gfx_write_fence(struct r600_common_context *ctx,
+void r600_gfx_write_fence(struct r600_common_context *ctx, struct 
r600_resource *buf,
                          uint64_t va, uint32_t old_value, uint32_t new_value)
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
        if (ctx->chip_class == CIK) {
                /* Two EOP events are required to make all engines go idle
                 * (and optional cache flushes executed) before the timestamp
                 * is written.
                 */
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
@@ -99,29 +99,34 @@ void r600_gfx_write_fence(struct r600_common_context *ctx,
                radeon_emit(cs, 0); /* unused */
        }
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
                        EVENT_INDEX(5));
        radeon_emit(cs, va);
        radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
        radeon_emit(cs, new_value); /* immediate data */
        radeon_emit(cs, 0); /* unused */
+
+       r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, 
RADEON_PRIO_QUERY);
 }
 
 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
 {
        unsigned dwords = 6;
 
        if (screen->chip_class == CIK)
                dwords *= 2;
 
+       if (!screen->info.has_virtual_memory)
+               dwords += 2;
+
        return dwords;
 }
 
 void r600_gfx_wait_fence(struct r600_common_context *ctx,
                         uint64_t va, uint32_t ref, uint32_t mask)
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
        radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
        radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index ec7f7c0..cdcc80b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -696,21 +696,21 @@ struct pipe_resource * r600_aligned_buffer_create(struct 
pipe_screen *screen,
                                                  unsigned alignment);
 struct pipe_resource *
 r600_buffer_from_user_memory(struct pipe_screen *screen,
                             const struct pipe_resource *templ,
                             void *user_memory);
 void
 r600_invalidate_resource(struct pipe_context *ctx,
                         struct pipe_resource *resource);
 
 /* r600_common_pipe.c */
-void r600_gfx_write_fence(struct r600_common_context *ctx,
+void r600_gfx_write_fence(struct r600_common_context *ctx, struct 
r600_resource *buf,
                          uint64_t va, uint32_t old_value, uint32_t new_value);
 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
 void r600_gfx_wait_fence(struct r600_common_context *ctx,
                         uint64_t va, uint32_t ref, uint32_t mask);
 void r600_draw_rectangle(struct blitter_context *blitter,
                         int x1, int y1, int x2, int y2, float depth,
                         enum blitter_attrib_type type,
                         const union pipe_color_union *attrib);
 bool r600_common_screen_init(struct r600_common_screen *rscreen,
                             struct radeon_winsys *ws);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 0908a6c..925c950 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -609,32 +609,32 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
 
        ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
 }
 
 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
                                       struct r600_query_hw *query,
                                       struct r600_resource *buffer,
                                       uint64_t va)
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       uint64_t fence_va = 0;
 
        switch (query->b.type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
                va += 8;
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1));
                radeon_emit(cs, va);
                radeon_emit(cs, (va >> 32) & 0xFFFF);
 
-               va += ctx->max_db * 16 - 8;
-               r600_gfx_write_fence(ctx, va, 0, 0x80000000);
+               fence_va = va + ctx->max_db * 16 - 8;
                break;
        case PIPE_QUERY_PRIMITIVES_EMITTED:
        case PIPE_QUERY_PRIMITIVES_GENERATED:
        case PIPE_QUERY_SO_STATISTICS:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
                va += query->result_size/2;
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
EVENT_INDEX(3));
                radeon_emit(cs, va);
                radeon_emit(cs, (va >> 32) & 0xFFFF);
@@ -643,41 +643,42 @@ static void r600_query_hw_do_emit_stop(struct 
r600_common_context *ctx,
                va += 8;
                /* fall through */
        case PIPE_QUERY_TIMESTAMP:
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | 
EVENT_INDEX(5));
                radeon_emit(cs, va);
                radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
                radeon_emit(cs, 0);
                radeon_emit(cs, 0);
 
-               va += 8;
-               r600_gfx_write_fence(ctx, va, 0, 0x80000000);
+               fence_va = va + 8;
                break;
        case PIPE_QUERY_PIPELINE_STATISTICS: {
                unsigned sample_size = (query->result_size - 8) / 2;
 
                va += sample_size;
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
                radeon_emit(cs, va);
                radeon_emit(cs, (va >> 32) & 0xFFFF);
 
-               va += sample_size;
-               r600_gfx_write_fence(ctx, va, 0, 0x80000000);
+               fence_va = va + sample_size;
                break;
        }
        default:
                assert(0);
        }
        r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
                        RADEON_PRIO_QUERY);
+
+       if (fence_va)
+               r600_gfx_write_fence(ctx, query->buffer.buf, fence_va, 0, 
0x80000000);
 }
 
 static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
                                    struct r600_query_hw *query)
 {
        uint64_t va;
 
        if (!query->buffer.buf)
                return; // previous buffer allocation failure
 
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index d0c5392..0f5c28c 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -584,21 +584,21 @@ static void si_pc_emit_start(struct r600_common_context 
*ctx,
                               S_036020_PERFMON_STATE(V_036020_START_COUNTING));
 }
 
 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
  * do it again in here. */
 static void si_pc_emit_stop(struct r600_common_context *ctx,
                            struct r600_resource *buffer, uint64_t va)
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
-       r600_gfx_write_fence(ctx, va, 1, 0);
+       r600_gfx_write_fence(ctx, buffer, va, 1, 0);
        r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | 
EVENT_INDEX(0));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | 
EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
                               S_036020_PERFMON_SAMPLE_ENABLE(1));
 }
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to