From: Marek Olšák <[email protected]>

This just needs to be done for r600g in the screen.
We don't need an IB submission for every new context created for GCN.
---
 src/gallium/drivers/r600/r600_pipe.c              |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c     |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h     |  3 +--
 src/gallium/drivers/radeon/r600_query.c           | 33 +++++++++++------------
 src/gallium/drivers/radeonsi/si_pipe.c            |  1 -
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  3 +++
 6 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 98ceebf..f126af0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -200,21 +200,20 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen,
        if (rscreen->b.debug_flags & DBG_FORCE_DMA)
                rctx->b.b.resource_copy_region = rctx->b.dma_copy;
 
        rctx->blitter = util_blitter_create(&rctx->b.b);
        if (rctx->blitter == NULL)
                goto fail;
        util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
        rctx->blitter->draw_rectangle = r600_draw_rectangle;
 
        r600_begin_new_cs(rctx);
-       r600_query_init_backend_mask(&rctx->b); /* this emits commands and must 
be last */
 
        rctx->dummy_pixel_shader =
                util_make_fragment_cloneinput_shader(&rctx->b.b, 0,
                                                     TGSI_SEMANTIC_GENERIC,
                                                     TGSI_INTERPOLATE_CONSTANT);
        rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader);
 
        return &rctx->b.b;
 
 fail:
@@ -728,12 +727,13 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
        for (i = 0; i < 256; i++) {
                printf("%02X", map[i]);
                if (i % 16 == 15)
                        printf("\n");
        }
 #endif
 
        if (rscreen->b.debug_flags & DBG_TEST_DMA)
                r600_test_dma(&rscreen->b);
 
+       r600_query_fix_enabled_rb_mask(rscreen);
        return &rscreen->b.b;
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 5113765..5be21b4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -1300,20 +1300,21 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
                printf("num_good_compute_units = %i\n", 
rscreen->info.num_good_compute_units);
                printf("max_se = %i\n", rscreen->info.max_se);
                printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
 
                printf("r600_gb_backend_map = %i\n", 
rscreen->info.r600_gb_backend_map);
                printf("r600_gb_backend_map_valid = %i\n", 
rscreen->info.r600_gb_backend_map_valid);
                printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
                printf("num_render_backends = %i\n", 
rscreen->info.num_render_backends);
                printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
                printf("pipe_interleave_bytes = %i\n", 
rscreen->info.pipe_interleave_bytes);
+               printf("enabled_rb_mask = 0x%x\n", 
rscreen->info.enabled_rb_mask);
        }
        return true;
 }
 
 void r600_destroy_common_screen(struct r600_common_screen *rscreen)
 {
        r600_perfcounters_destroy(rscreen);
        r600_gpu_load_kill_thread(rscreen);
 
        pipe_mutex_destroy(rscreen->gpu_load_mutex);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f3e42ee..bc88fab 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -574,21 +574,20 @@ struct r600_common_context {
        /* Additional context states. */
        unsigned flags; /* flush flags */
 
        /* Queries. */
        /* Maintain the list of active queries for pausing between IBs. */
        int                             num_occlusion_queries;
        int                             num_perfect_occlusion_queries;
        struct list_head                active_queries;
        unsigned                        num_cs_dw_queries_suspend;
        /* Additional hardware info. */
-       unsigned                        backend_mask;
        unsigned                        max_db; /* for OQ */
        /* Misc stats. */
        unsigned                        num_draw_calls;
        unsigned                        num_spill_draw_calls;
        unsigned                        num_compute_calls;
        unsigned                        num_spill_compute_calls;
        unsigned                        num_dma_calls;
        unsigned                        num_cp_dma_calls;
        unsigned                        num_vs_flushes;
        unsigned                        num_ps_flushes;
@@ -763,21 +762,21 @@ unsigned r600_end_counter(struct r600_common_screen 
*rscreen, unsigned type,
                          uint64_t begin);
 
 /* r600_perfcounters.c */
 void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
 
 /* r600_query.c */
 void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_queries(struct r600_common_context *ctx);
 void r600_resume_queries(struct r600_common_context *ctx);
-void r600_query_init_backend_mask(struct r600_common_context *ctx);
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
 
 /* r600_streamout.c */
 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
 void r600_set_streamout_targets(struct pipe_context *ctx,
                                unsigned num_targets,
                                struct pipe_stream_output_target **targets,
                                const unsigned *offset);
 void r600_emit_streamout_end(struct r600_common_context *rctx);
 void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
                                             unsigned type, int diff);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 25e7f5b..564f59e 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -422,28 +422,29 @@ static bool r600_query_hw_prepare_buffer(struct 
r600_common_context *ctx,
        uint32_t *results = ctx->ws->buffer_map(buffer->buf, NULL,
                                                PIPE_TRANSFER_WRITE |
                                                PIPE_TRANSFER_UNSYNCHRONIZED);
        if (!results)
                return false;
 
        memset(results, 0, buffer->b.b.width0);
 
        if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
            query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+               unsigned enabled_rb_mask = ctx->screen->info.enabled_rb_mask;
                unsigned num_results;
                unsigned i, j;
 
                /* Set top bits for unused backends. */
                num_results = buffer->b.b.width0 / query->result_size;
                for (j = 0; j < num_results; j++) {
                        for (i = 0; i < ctx->max_db; i++) {
-                               if (!(ctx->backend_mask & (1<<i))) {
+                               if (!(enabled_rb_mask & (1<<i))) {
                                        results[(i * 4)+1] = 0x80000000;
                                        results[(i * 4)+3] = 0x80000000;
                                }
                        }
                        results += 4 * ctx->max_db;
                }
        }
 
        return true;
 }
@@ -1598,62 +1599,65 @@ void r600_resume_queries(struct r600_common_context 
*ctx)
        assert(ctx->num_cs_dw_queries_suspend == 0);
 
        /* Check CS space here. Resuming must not be interrupted by flushes. */
        ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
 
        LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
                r600_query_hw_emit_start(ctx, query);
        }
 }
 
-/* Get backends mask */
-void r600_query_init_backend_mask(struct r600_common_context *ctx)
+/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
 {
+       struct r600_common_context *ctx =
+               (struct r600_common_context*)rscreen->aux_context;
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
        struct r600_resource *buffer;
        uint32_t *results;
-       unsigned num_backends = ctx->screen->info.num_render_backends;
        unsigned i, mask = 0;
 
+       assert(rscreen->chip_class <= CAYMAN);
+
        /* if backend_map query is supported by the kernel */
-       if (ctx->screen->info.r600_gb_backend_map_valid) {
-               unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes;
-               unsigned backend_map = ctx->screen->info.r600_gb_backend_map;
+       if (rscreen->info.r600_gb_backend_map_valid) {
+               unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
+               unsigned backend_map = rscreen->info.r600_gb_backend_map;
                unsigned item_width, item_mask;
 
                if (ctx->chip_class >= EVERGREEN) {
                        item_width = 4;
                        item_mask = 0x7;
                } else {
                        item_width = 2;
                        item_mask = 0x3;
                }
 
                while (num_tile_pipes--) {
                        i = backend_map & item_mask;
                        mask |= (1<<i);
                        backend_map >>= item_width;
                }
                if (mask != 0) {
-                       ctx->backend_mask = mask;
+                       rscreen->info.enabled_rb_mask = mask;
                        return;
                }
        }
 
        /* otherwise backup path for older kernels */
 
        /* create buffer for event data */
        buffer = (struct r600_resource*)
                pipe_buffer_create(ctx->b.screen, 0,
                                   PIPE_USAGE_STAGING, ctx->max_db*16);
        if (!buffer)
-               goto err;
+               return;
 
        /* initialize buffer with zeroes */
        results = r600_buffer_map_sync_with_rings(ctx, buffer, 
PIPE_TRANSFER_WRITE);
        if (results) {
                memset(results, 0, ctx->max_db * 4 * 4);
 
                /* emit EVENT_WRITE for ZPASS_DONE */
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1));
                radeon_emit(cs, buffer->gpu_address);
@@ -1668,29 +1672,22 @@ void r600_query_init_backend_mask(struct 
r600_common_context *ctx)
                        for(i = 0; i < ctx->max_db; i++) {
                                /* at least highest bit will be set if backend 
is used */
                                if (results[i*4 + 1])
                                        mask |= (1<<i);
                        }
                }
        }
 
        r600_resource_reference(&buffer, NULL);
 
-       if (mask != 0) {
-               ctx->backend_mask = mask;
-               return;
-       }
-
-err:
-       /* fallback to old method - set num_backends lower bits to 1 */
-       ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
-       return;
+       if (mask)
+               rscreen->info.enabled_rb_mask = mask;
 }
 
 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
        { \
                .name = name_, \
                .query_type = R600_QUERY_##query_type_, \
                .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
                .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
                .group_id = group_id_ \
        }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 8c54997..cb31113 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -246,21 +246,20 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 
        sctx->blitter = util_blitter_create(&sctx->b.b);
        if (sctx->blitter == NULL)
                goto fail;
        sctx->blitter->draw_rectangle = r600_draw_rectangle;
 
        sctx->sample_mask.sample_mask = 0xffff;
 
        /* these must be last */
        si_begin_new_cs(sctx);
-       r600_query_init_backend_mask(&sctx->b); /* this emits commands and must 
be last */
 
        /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
         * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
        if (sctx->b.chip_class == CIK) {
                sctx->null_const_buf.buffer = pipe_buffer_create(screen, 
PIPE_BIND_CONSTANT_BUFFER,
                                                                 
PIPE_USAGE_DEFAULT, 16);
                if (!sctx->null_const_buf.buffer)
                        goto fail;
                sctx->null_const_buf.buffer_size = 
sctx->null_const_buf.buffer->width0;
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index c85e427..2a34d51 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -377,20 +377,23 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      * unlikely to succeed. */
     ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 
0.7;
     if (ws->info.drm_minor < 40)
         ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
 
     /* Get max clock frequency info and convert it to MHz */
     radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
                          &ws->info.max_shader_clock);
     ws->info.max_shader_clock /= 1000;
 
+    /* Default value. */
+    ws->info.enabled_rb_mask = u_bit_consecutive(0, 
ws->info.num_render_backends);
+    /* This fails on non-GCN or older kernels: */
     radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
                          &ws->info.enabled_rb_mask);
 
     ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
     /* Generation-specific queries. */
     if (ws->gen == DRV_R300) {
         if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
                                   "GB pipe count",
                                   &ws->info.r300_num_gb_pipes))
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to