From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/amd/common/ac_rtld.c                        | 9 +++++++++
 src/amd/common/ac_rtld.h                        | 9 +++++++++
 src/gallium/drivers/radeonsi/si_debug_options.h | 1 +
 src/gallium/drivers/radeonsi/si_shader.c        | 3 +++
 4 files changed, 22 insertions(+)

diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
index 92020c5f0dd..c750dbfa9cb 100644
--- a/src/amd/common/ac_rtld.c
+++ b/src/amd/common/ac_rtld.c
@@ -236,20 +236,21 @@ static bool read_private_lds_symbols(struct 
ac_rtld_binary *binary,
 bool ac_rtld_open(struct ac_rtld_binary *binary,
                  struct ac_rtld_open_info i)
 {
        /* One of the libelf implementations
         * (http://www.mr511.de/software/english.htm) requires calling
         * elf_version() before elf_memory().
         */
        elf_version(EV_CURRENT);
 
        memset(binary, 0, sizeof(*binary));
+       memcpy(&binary->options, &i.options, sizeof(binary->options));
        binary->num_parts = i.num_parts;
        binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
        if (!binary->parts)
                return false;
 
        uint64_t pasted_text_size = 0;
        uint64_t rx_align = 1;
        uint64_t rx_size = 0;
 
 #define report_if(cond) \
@@ -283,20 +284,23 @@ bool ac_rtld_open(struct ac_rtld_binary *binary,
        uint64_t shared_lds_size = 0;
        if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, 
&shared_lds_size))
                goto fail;
        report_if(shared_lds_size > max_lds_size);
        binary->lds_size = shared_lds_size;
 
        /* First pass over all parts: open ELFs, pre-determine the placement of
         * sections in the memory image, and collect and layout private LDS 
symbols. */
        uint32_t lds_end_align = 0;
 
+       if (binary->options.halt_at_entry)
+               pasted_text_size += 4;
+
        for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
                struct ac_rtld_part *part = &binary->parts[part_idx];
                unsigned part_lds_symbols_begin =
                        util_dynarray_num_elements(&binary->lds_symbols, struct 
ac_rtld_symbol);
 
                part->elf = elf_memory((char *)i.elf_ptrs[part_idx], 
i.elf_sizes[part_idx]);
                report_elf_if(!part->elf);
 
                const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
                report_elf_if(!ehdr);
@@ -685,20 +689,25 @@ bool ac_rtld_upload(struct ac_rtld_upload_info *u)
                } \
        } while (false)
 #define report_elf_if(cond) \
        do { \
                if ((cond)) { \
                        report_errorf(#cond); \
                        return false; \
                } \
        } while (false)
 
+       if (u->binary->options.halt_at_entry) {
+               /* s_sethalt 1 */
+               *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
+       }
+
        /* First pass: upload raw section data and lay out private LDS symbols. 
*/
        for (unsigned i = 0; i < u->binary->num_parts; ++i) {
                struct ac_rtld_part *part = &u->binary->parts[i];
 
                Elf_Scn *section = NULL;
                while ((section = elf_nextscn(part->elf, section))) {
                        Elf64_Shdr *shdr = elf64_getshdr(section);
                        struct ac_rtld_section *s = 
&part->sections[elf_ndxscn(section)];
 
                        if (!s->is_rx)
diff --git a/src/amd/common/ac_rtld.h b/src/amd/common/ac_rtld.h
index 01c29b50817..b13270b181d 100644
--- a/src/amd/common/ac_rtld.h
+++ b/src/amd/common/ac_rtld.h
@@ -35,22 +35,30 @@ struct ac_shader_config;
 struct radeon_info;
 
 struct ac_rtld_symbol {
        const char *name;
        uint32_t size;
        uint32_t align;
        uint64_t offset; /* filled in by ac_rtld_open */
        unsigned part_idx; /* shader part in which this symbol appears */
 };
 
+struct ac_rtld_options {
+       /* Loader will insert an s_sethalt 1 instruction as the
+        * first instruction. */
+       bool halt_at_entry:1;
+};
+
 /* Lightweight wrapper around underlying ELF objects. */
 struct ac_rtld_binary {
+       struct ac_rtld_options options;
+
        /* Required buffer sizes, currently read/executable only. */
        uint64_t rx_size;
 
        uint64_t rx_end_markers;
 
        unsigned num_parts;
        struct ac_rtld_part *parts;
 
        struct util_dynarray lds_symbols;
        uint32_t lds_size;
@@ -68,20 +76,21 @@ struct ac_rtld_binary {
 typedef bool (*ac_rtld_get_external_symbol_cb)(
        void *cb_data, const char *symbol, uint64_t *value);
 
 /**
  * Lifetimes of \ref info, in-memory ELF objects, and the names of
  * \ref shared_lds_symbols must extend until \ref ac_rtld_close is called on
  * the opened binary.
  */
 struct ac_rtld_open_info {
        const struct radeon_info *info;
+       struct ac_rtld_options options;
 
        unsigned num_parts;
        const char * const *elf_ptrs; /* in-memory ELF objects of each part */
        const size_t *elf_sizes; /* sizes of corresponding in-memory ELF 
objects in bytes */
 
        /* Shared LDS symbols are layouted such that they are accessible from
         * all shader parts. Non-shared (private) LDS symbols of one part may
         * overlap private LDS symbols of another shader part.
         */
        unsigned num_shared_lds_symbols;
diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h 
b/src/gallium/drivers/radeonsi/si_debug_options.h
index aa8d64e1b88..d6cb3157632 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -1,10 +1,11 @@
 OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth 
clear")
 OPT_BOOL(enable_nir, false, "Enable NIR")
 OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context")
 OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause 
stalls)")
 OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of 
ddebug_dumps")
 OPT_BOOL(debug_disassembly, false, "Report shader disassembly as part of 
driver debug messages (for shader db)")
+OPT_BOOL(halt_shaders, false, "Halt shaders at the start (will hang)")
 OPT_BOOL(vs_fetch_always_opencode, false, "Always open code vertex fetches 
(less efficient, purely for testing)")
 OPT_BOOL(prim_restart_tri_strips_only, false, "Only enable primitive restart 
for triangle strips")
 
 #undef OPT_BOOL
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 92c68f21459..3c3d74ce7af 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5126,20 +5126,23 @@ static bool si_shader_binary_open(struct si_screen 
*screen,
                 * shader->config.lds_size is set correctly below.
                 */
                struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
                sym->name = "esgs_ring";
                sym->size = shader->gs_info.esgs_ring_size;
                sym->align = 64 * 1024;
        }
 
        bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){
                        .info = &screen->info,
+                       .options = {
+                               .halt_at_entry = screen->options.halt_shaders,
+                       },
                        .num_parts = num_parts,
                        .elf_ptrs = part_elfs,
                        .elf_sizes = part_sizes,
                        .num_shared_lds_symbols = num_lds_symbols,
                        .shared_lds_symbols = lds_symbols });
 
        if (rtld->lds_size > 0) {
                unsigned alloc_granularity = screen->info.chip_class >= GFX7 ? 
512 : 256;
                shader->config.lds_size =
                        align(rtld->lds_size, alloc_granularity) / 
alloc_granularity;
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to