From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/amd/common/ac_rtld.c | 9 +++++++++ src/amd/common/ac_rtld.h | 9 +++++++++ src/gallium/drivers/radeonsi/si_debug_options.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 3 +++ 4 files changed, 22 insertions(+)
diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c index 92020c5f0dd..c750dbfa9cb 100644 --- a/src/amd/common/ac_rtld.c +++ b/src/amd/common/ac_rtld.c @@ -236,20 +236,21 @@ static bool read_private_lds_symbols(struct ac_rtld_binary *binary, bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i) { /* One of the libelf implementations * (http://www.mr511.de/software/english.htm) requires calling * elf_version() before elf_memory(). */ elf_version(EV_CURRENT); memset(binary, 0, sizeof(*binary)); + memcpy(&binary->options, &i.options, sizeof(binary->options)); binary->num_parts = i.num_parts; binary->parts = calloc(sizeof(*binary->parts), i.num_parts); if (!binary->parts) return false; uint64_t pasted_text_size = 0; uint64_t rx_align = 1; uint64_t rx_size = 0; #define report_if(cond) \ @@ -283,20 +284,23 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, uint64_t shared_lds_size = 0; if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size)) goto fail; report_if(shared_lds_size > max_lds_size); binary->lds_size = shared_lds_size; /* First pass over all parts: open ELFs, pre-determine the placement of * sections in the memory image, and collect and layout private LDS symbols. */ uint32_t lds_end_align = 0; + if (binary->options.halt_at_entry) + pasted_text_size += 4; + for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { struct ac_rtld_part *part = &binary->parts[part_idx]; unsigned part_lds_symbols_begin = util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol); part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]); report_elf_if(!part->elf); const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf); report_elf_if(!ehdr); @@ -685,20 +689,25 @@ bool ac_rtld_upload(struct ac_rtld_upload_info *u) } \ } while (false) #define report_elf_if(cond) \ do { \ if ((cond)) { \ report_errorf(#cond); \ return false; \ } \ } while (false) + if (u->binary->options.halt_at_entry) { + /* s_sethalt 1 */ + *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001); + } + /* First pass: upload raw section data and lay out private LDS symbols. */ for (unsigned i = 0; i < u->binary->num_parts; ++i) { struct ac_rtld_part *part = &u->binary->parts[i]; Elf_Scn *section = NULL; while ((section = elf_nextscn(part->elf, section))) { Elf64_Shdr *shdr = elf64_getshdr(section); struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)]; if (!s->is_rx) diff --git a/src/amd/common/ac_rtld.h b/src/amd/common/ac_rtld.h index 01c29b50817..b13270b181d 100644 --- a/src/amd/common/ac_rtld.h +++ b/src/amd/common/ac_rtld.h @@ -35,22 +35,30 @@ struct ac_shader_config; struct radeon_info; struct ac_rtld_symbol { const char *name; uint32_t size; uint32_t align; uint64_t offset; /* filled in by ac_rtld_open */ unsigned part_idx; /* shader part in which this symbol appears */ }; +struct ac_rtld_options { + /* Loader will insert an s_sethalt 1 instruction as the + * first instruction. */ + bool halt_at_entry:1; +}; + /* Lightweight wrapper around underlying ELF objects. */ struct ac_rtld_binary { + struct ac_rtld_options options; + /* Required buffer sizes, currently read/executable only. */ uint64_t rx_size; uint64_t rx_end_markers; unsigned num_parts; struct ac_rtld_part *parts; struct util_dynarray lds_symbols; uint32_t lds_size; @@ -68,20 +76,21 @@ struct ac_rtld_binary { typedef bool (*ac_rtld_get_external_symbol_cb)( void *cb_data, const char *symbol, uint64_t *value); /** * Lifetimes of \ref info, in-memory ELF objects, and the names of * \ref shared_lds_symbols must extend until \ref ac_rtld_close is called on * the opened binary. */ struct ac_rtld_open_info { const struct radeon_info *info; + struct ac_rtld_options options; unsigned num_parts; const char * const *elf_ptrs; /* in-memory ELF objects of each part */ const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */ /* Shared LDS symbols are layouted such that they are accessible from * all shader parts. Non-shared (private) LDS symbols of one part may * overlap private LDS symbols of another shader part. */ unsigned num_shared_lds_symbols; diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h index aa8d64e1b88..d6cb3157632 100644 --- a/src/gallium/drivers/radeonsi/si_debug_options.h +++ b/src/gallium/drivers/radeonsi/si_debug_options.h @@ -1,10 +1,11 @@ OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear") OPT_BOOL(enable_nir, false, "Enable NIR") OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context") OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)") OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps") OPT_BOOL(debug_disassembly, false, "Report shader disassembly as part of driver debug messages (for shader db)") +OPT_BOOL(halt_shaders, false, "Halt shaders at the start (will hang)") OPT_BOOL(vs_fetch_always_opencode, false, "Always open code vertex fetches (less efficient, purely for testing)") OPT_BOOL(prim_restart_tri_strips_only, false, "Only enable primitive restart for triangle strips") #undef OPT_BOOL diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 92c68f21459..3c3d74ce7af 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5126,20 +5126,23 @@ static bool si_shader_binary_open(struct si_screen *screen, * shader->config.lds_size is set correctly below. */ struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; sym->name = "esgs_ring"; sym->size = shader->gs_info.esgs_ring_size; sym->align = 64 * 1024; } bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){ .info = &screen->info, + .options = { + .halt_at_entry = screen->options.halt_shaders, + }, .num_parts = num_parts, .elf_ptrs = part_elfs, .elf_sizes = part_sizes, .num_shared_lds_symbols = num_lds_symbols, .shared_lds_symbols = lds_symbols }); if (rtld->lds_size > 0) { unsigned alloc_granularity = screen->info.chip_class >= GFX7 ? 512 : 256; shader->config.lds_size = align(rtld->lds_size, alloc_granularity) / alloc_granularity; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev