Module: Mesa Branch: main Commit: 67bb8e8165db3717a8515fa931dd3776610713e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=67bb8e8165db3717a8515fa931dd3776610713e8
Author: Faith Ekstrand <[email protected]> Date: Tue Nov 14 12:08:38 2023 -0600 nvk: Move the guts of nvk_compile_nir() to nvk_codegen.c Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26197> --- src/nouveau/vulkan/nvk_codegen.c | 655 +++++++++++++++++++++++++++++++++++++++ src/nouveau/vulkan/nvk_shader.c | 647 +------------------------------------- src/nouveau/vulkan/nvk_shader.h | 4 + 3 files changed, 660 insertions(+), 646 deletions(-) diff --git a/src/nouveau/vulkan/nvk_codegen.c b/src/nouveau/vulkan/nvk_codegen.c index 6b416514152..8acd67fc85b 100644 --- a/src/nouveau/vulkan/nvk_codegen.c +++ b/src/nouveau/vulkan/nvk_codegen.c @@ -2,13 +2,19 @@ * Copyright © 2022 Collabora Ltd. and Red Hat Inc. * SPDX-License-Identifier: MIT */ +#include "nvk_cmd_buffer.h" #include "nvk_physical_device.h" #include "nvk_shader.h" #include "nir.h" #include "nir_builder.h" +#include "nir_xfb_info.h" #include "nv50_ir_driver.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "nvk_cl9097.h" uint64_t nvk_cg_get_prog_debug(void) @@ -194,3 +200,652 @@ nvk_cg_optimize_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); } + +/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than + * 124 scalar varying values. + */ +static uint32_t +nvc0_shader_input_address(unsigned sn, unsigned si) +{ + switch (sn) { + case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; + case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case TGSI_SEMANTIC_LAYER: return 0x064; + case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_PCOORD: return 0x2e0; + case TGSI_SEMANTIC_TESSCOORD: return 0x2f0; + case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; + case TGSI_SEMANTIC_VERTEXID: return 0x2fc; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + default: + assert(!"invalid TGSI input semantic"); + return ~0; + } +} + +static uint32_t +nvc0_shader_output_address(unsigned sn, unsigned si) +{ + switch (sn) { + case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; + case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case TGSI_SEMANTIC_LAYER: return 0x064; + case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0; + case TGSI_SEMANTIC_EDGEFLAG: return ~0; + default: + assert(!"invalid TGSI output semantic"); + return ~0; + } +} + +static int +nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info) +{ + unsigned i, c, n; + + for (n = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ + case TGSI_SEMANTIC_VERTEXID: + info->in[i].mask = 0x1; + info->in[i].slot[0] = + nvc0_shader_input_address(info->in[i].sn, 0) / 4; + continue; + default: + break; + } + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; + ++n; + } + + return 0; +} + +static int +nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info) +{ + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numInputs; ++i) { + offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si); + + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info) +{ + unsigned count = info->prop.fp.numColourResults * 4; + unsigned i, c; + + /* Compute the relative position of each color output, since skipped MRT + * positions will not have registers allocated to them. + */ + unsigned colors[8] = {0}; + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + colors[info->out[i].si] = 1; + for (i = 0, c = 0; i < 8; i++) + if (colors[i]) + colors[i] = c++; + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = colors[info->out[i].si] * 4 + c; + + if (info->io.sampleMask < NV50_CODEGEN_MAX_VARYINGS) + info->out[info->io.sampleMask].slot[0] = count++; + else + if (info->target >= 0xe0) + count++; /* on Kepler, depth is always last colour reg + 2 */ + + if (info->io.fragDepth < NV50_CODEGEN_MAX_VARYINGS) + info->out[info->io.fragDepth].slot[2] = count; + + return 0; +} + +static int +nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info) +{ + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numOutputs; ++i) { + offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si); + + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) +{ + int ret; + + if (info->type == PIPE_SHADER_VERTEX) + ret = nvc0_vp_assign_input_slots(info); + else + ret = nvc0_sp_assign_input_slots(info); + if (ret) + return ret; + + if (info->type == PIPE_SHADER_FRAGMENT) + ret = nvc0_fp_assign_output_slots(info); + else + ret = nvc0_sp_assign_output_slots(info); + return ret; +} + +static inline void +nvk_vtgs_hdr_update_oread(struct nvk_shader *vs, uint8_t slot) +{ + uint8_t min = (vs->hdr[4] >> 12) & 0xff; + uint8_t max = (vs->hdr[4] >> 24); + + min = MIN2(min, slot); + max = MAX2(max, slot); + + vs->hdr[4] = (max << 24) | (min << 12); +} + +static int +nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) +{ + unsigned i, c, a; + + for (i = 0; i < info->numInputs; ++i) { + if (info->in[i].patch) + continue; + for (c = 0; c < 4; ++c) { + a = info->in[i].slot[c]; + if (info->in[i].mask & (1 << c)) + vs->hdr[5 + a / 32] |= 1 << (a % 32); + } + } + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].patch) + continue; + for (c = 0; c < 4; ++c) { + if (!(info->out[i].mask & (1 << c))) + continue; + assert(info->out[i].slot[c] >= 0x40 / 4); + a = info->out[i].slot[c] - 0x40 / 4; + vs->hdr[13 + a / 32] |= 1 << (a % 32); + if (info->out[i].oread) + nvk_vtgs_hdr_update_oread(vs, info->out[i].slot[c]); + } + } + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case SYSTEM_VALUE_PRIMITIVE_ID: + vs->hdr[5] |= 1 << 24; + break; + case SYSTEM_VALUE_INSTANCE_ID: + vs->hdr[10] |= 1 << 30; + break; + case SYSTEM_VALUE_VERTEX_ID: + vs->hdr[10] |= 1 << 31; + break; + case SYSTEM_VALUE_TESS_COORD: + /* We don't have the mask, nor the slots populated. While this could + * be achieved, the vast majority of the time if either of the coords + * are read, then both will be read. + */ + nvk_vtgs_hdr_update_oread(vs, 0x2f0 / 4); + nvk_vtgs_hdr_update_oread(vs, 0x2f4 / 4); + break; + default: + break; + } + } + + vs->vs.clip_enable = (1 << info->io.clipDistances) - 1; + vs->vs.cull_enable = + ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; + for (i = 0; i < info->io.cullDistances; ++i) + vs->vs.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); + + vs->vs.layer_viewport_relative = info->io.layer_viewport_relative; + + return 0; +} + +static int +nvk_vs_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) +{ + vs->hdr[0] = 0x20061 | (1 << 10); + vs->hdr[4] = 0xff000; + + return nvk_vtgp_gen_header(vs, info); +} + +static int +nvk_gs_gen_header(struct nvk_shader *gs, + const struct nir_shader *nir, + struct nv50_ir_prog_info_out *info) +{ + gs->hdr[0] = 0x20061 | (4 << 10); + + gs->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; + + switch (info->prop.gp.outputPrim) { + case MESA_PRIM_POINTS: + gs->hdr[3] = 0x01000000; + break; + case MESA_PRIM_LINE_STRIP: + gs->hdr[3] = 0x06000000; + break; + case MESA_PRIM_TRIANGLE_STRIP: + gs->hdr[3] = 0x07000000; + break; + default: + assert(0); + break; + } + + gs->hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024); + + gs->hdr[0] |= nir->info.gs.active_stream_mask << 28; + + return nvk_vtgp_gen_header(gs, info); +} + +static void +nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out *info, + struct nvk_shader *shader) +{ + // TODO: this is a little confusing because nouveau codegen uses + // MESA_PRIM_POINTS for unspecified domain and + // MESA_PRIM_POINTS = 0, the same as NV9097 ISOLINE enum + uint32_t domain_type; + switch (info->prop.tp.domain) { + case MESA_PRIM_LINES: + domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_ISOLINE; + break; + case MESA_PRIM_TRIANGLES: + domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_TRIANGLE; + break; + case MESA_PRIM_QUADS: + domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_QUAD; + break; + default: + domain_type = ~0; + break; + } + shader->tp.domain_type = domain_type; + if (domain_type == ~0) { + return; + } + + uint32_t spacing; + switch (info->prop.tp.partitioning) { + case PIPE_TESS_SPACING_EQUAL: + spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_INTEGER; + break; + case PIPE_TESS_SPACING_FRACTIONAL_ODD: + spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_ODD; + break; + case PIPE_TESS_SPACING_FRACTIONAL_EVEN: + spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_EVEN; + break; + default: + assert(!"invalid tessellator partitioning"); + break; + } + shader->tp.spacing = spacing; + + uint32_t output_prims; + if (info->prop.tp.outputPrim == MESA_PRIM_POINTS) { // point_mode + output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_POINTS; + } else if (info->prop.tp.domain == MESA_PRIM_LINES) { // isoline domain + output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_LINES; + } else { // triangle/quad domain + if (info->prop.tp.winding > 0) { + output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CW; + } else { + output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CCW; + } + } + shader->tp.output_prims = output_prims; +} + +static int +nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info) +{ + unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ + + if (info->numPatchConstants) + opcs = 8 + info->numPatchConstants * 4; + + tcs->hdr[0] = 0x20061 | (2 << 10); + + tcs->hdr[1] = opcs << 24; + tcs->hdr[2] = info->prop.tp.outputPatchSize << 24; + + tcs->hdr[4] = 0xff000; /* initial min/max parallel output read address */ + + nvk_vtgp_gen_header(tcs, info); + + if (info->target >= NVISA_GM107_CHIPSET) { + /* On GM107+, the number of output patch components has moved in the TCP + * header, but it seems like blob still also uses the old position. + * Also, the high 8-bits are located in between the min/max parallel + * field and has to be set after updating the outputs. */ + tcs->hdr[3] = (opcs & 0x0f) << 28; + tcs->hdr[4] |= (opcs & 0xf0) << 16; + } + + nvk_generate_tessellation_parameters(info, tcs); + + return 0; +} + +static int +nvk_tes_gen_header(struct nvk_shader *tes, struct nv50_ir_prog_info_out *info) +{ + tes->hdr[0] = 0x20061 | (3 << 10); + tes->hdr[4] = 0xff000; + + nvk_vtgp_gen_header(tes, info); + + nvk_generate_tessellation_parameters(info, tes); + + tes->hdr[18] |= 0x3 << 12; /* ? */ + + return 0; +} + +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) + +static uint8_t +nvk_hdr_interp_mode(const struct nv50_ir_varying *var) +{ + if (var->linear) + return NVC0_INTERP_LINEAR; + if (var->flat) + return NVC0_INTERP_FLAT; + return NVC0_INTERP_PERSPECTIVE; +} + + +static int +nvk_fs_gen_header(struct nvk_shader *fs, const struct nvk_fs_key *key, + struct nv50_ir_prog_info_out *info) +{ + unsigned i, c, a, m; + + /* just 00062 on Kepler */ + fs->hdr[0] = 0x20062 | (5 << 10); + fs->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (info->prop.fp.usesDiscard || key->zs_self_dep) + fs->hdr[0] |= 0x8000; + if (!info->prop.fp.separateFragData) + fs->hdr[0] |= 0x4000; + if (info->io.sampleMask < 80 /* PIPE_MAX_SHADER_OUTPUTS */) + fs->hdr[19] |= 0x1; + if (info->prop.fp.writesDepth) { + fs->hdr[19] |= 0x2; + fs->flags[0] = 0x11; /* deactivate ZCULL */ + } + + for (i = 0; i < info->numInputs; ++i) { + m = nvk_hdr_interp_mode(&info->in[i]); + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { + fs->fs.colors |= 1 << info->in[i].si; + if (info->in[i].sc) + fs->fs.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); + } + for (c = 0; c < 4; ++c) { + if (!(info->in[i].mask & (1 << c))) + continue; + a = info->in[i].slot[c]; + if (info->in[i].slot[0] >= (0x060 / 4) && + info->in[i].slot[0] <= (0x07c / 4)) { + fs->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); + } else + if (info->in[i].slot[0] >= (0x2c0 / 4) && + info->in[i].slot[0] <= (0x2fc / 4)) { + fs->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; + } else { + if (info->in[i].slot[c] < (0x040 / 4) || + info->in[i].slot[c] > (0x380 / 4)) + continue; + a *= 2; + if (info->in[i].slot[0] >= (0x300 / 4)) + a -= 32; + fs->hdr[4 + a / 32] |= m << (a % 32); + } + } + } + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) + fs->hdr[5] |= 0x30000000; + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + fs->hdr[18] |= 0xf << (4 * info->out[i].si); + } + + /* There are no "regular" attachments, but the shader still needs to be + * executed. It seems like it wants to think that it has some color + * outputs in order to actually run. + */ + if (info->prop.fp.numColourResults == 0 && + !info->prop.fp.writesDepth && + info->io.sampleMask >= 80 /* PIPE_MAX_SHADER_OUTPUTS */) + fs->hdr[18] |= 0xf; + + fs->fs.early_z = info->prop.fp.earlyFragTests; + fs->fs.sample_mask_in = info->prop.fp.usesSampleMaskIn; + fs->fs.reads_framebuffer = info->prop.fp.readsFramebuffer; + fs->fs.post_depth_coverage = info->prop.fp.postDepthCoverage; + + /* Mark position xy and layer as read */ + if (fs->fs.reads_framebuffer) + fs->hdr[5] |= 0x32000000; + + return 0; +} + +static uint8_t find_register_index_for_xfb_output(const struct nir_shader *nir, + nir_xfb_output_info output) +{ + nir_foreach_shader_out_variable(var, nir) { + uint32_t slots = glsl_count_vec4_slots(var->type, false, false); + for (uint32_t i = 0; i < slots; ++i) { + if (output.location == (var->data.location+i)) { + return var->data.driver_location+i; + } + } + } + // should not be reached + return 0; +} + +static struct nvk_transform_feedback_state * +nvk_fill_transform_feedback_state(struct nir_shader *nir, + const struct nv50_ir_prog_info_out *info) +{ + const uint8_t max_buffers = 4; + const uint8_t dw_bytes = 4; + const struct nir_xfb_info *nx = nir->xfb_info; + //nir_print_xfb_info(nx, stdout); + + struct nvk_transform_feedback_state *xfb = + malloc(sizeof(struct nvk_transform_feedback_state)); + + if (!xfb) + return NULL; + + for (uint8_t b = 0; b < max_buffers; ++b) { + xfb->stride[b] = b < nx->buffers_written ? nx->buffers[b].stride : 0; + xfb->varying_count[b] = 0; + xfb->stream[b] = nx->buffer_to_stream[b]; + } + memset(xfb->varying_index, 0xff, sizeof(xfb->varying_index)); /* = skip */ + + if (info->numOutputs == 0) + return xfb; + + for (uint32_t i = 0; i < nx->output_count; ++i) { + const nir_xfb_output_info output = nx->outputs[i]; + const uint8_t b = output.buffer; + const uint8_t r = find_register_index_for_xfb_output(nir, output); + uint32_t p = output.offset / dw_bytes; + + assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->varying_index[b])); + + u_foreach_bit(c, nx->outputs[i].component_mask) + xfb->varying_index[b][p++] = info->out[r].slot[c]; + + xfb->varying_count[b] = MAX2(xfb->varying_count[b], p); + } + + /* zero unused indices */ + for (uint8_t b = 0; b < 4; ++b) + for (uint32_t c = xfb->varying_count[b]; c & 3; ++c) + xfb->varying_index[b][c] = 0; + + return xfb; +} + + +VkResult +nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, + const struct nvk_fs_key *fs_key, + struct nvk_shader *shader) +{ + struct nv50_ir_prog_info *info; + struct nv50_ir_prog_info_out info_out = {}; + int ret; + + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return false; + + info->type = pipe_shader_type_from_mesa(nir->info.stage); + info->target = pdev->info.chipset; + info->bin.nir = nir; + + for (unsigned i = 0; i < 3; i++) + shader->cp.block_size[i] = nir->info.workgroup_size[i]; + + info->bin.smemSize = shader->cp.smem_size; + info->dbgFlags = nvk_cg_get_prog_debug(); + info->optLevel = nvk_cg_get_prog_optimize(); + info->io.auxCBSlot = 1; + info->io.uboInfoBase = 0; + info->io.drawInfoBase = nvk_root_descriptor_offset(draw.base_vertex); + if (nir->info.stage == MESA_SHADER_COMPUTE) { + info->prop.cp.gridInfoBase = 0; + } else { + info->assignSlots = nvc0_program_assign_varying_slots; + } + ret = nv50_ir_generate_code(info, &info_out); + if (ret) + return VK_ERROR_UNKNOWN; + + if (info_out.bin.fixupData) { + nv50_ir_apply_fixups(info_out.bin.fixupData, info_out.bin.code, + fs_key && fs_key->force_per_sample, + false /* flatshade */, false /* alphatest */, + fs_key && fs_key->msaa); + } + + shader->stage = nir->info.stage; + shader->code_ptr = (uint8_t *)info_out.bin.code; + shader->code_size = info_out.bin.codeSize; + + if (info_out.target >= NVISA_GV100_CHIPSET) + shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 3); + else + shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 1); + shader->cp.smem_size = info_out.bin.smemSize; + shader->num_barriers = info_out.numBarriers; + + switch (info->type) { + case PIPE_SHADER_VERTEX: + ret = nvk_vs_gen_header(shader, &info_out); + break; + case PIPE_SHADER_FRAGMENT: + ret = nvk_fs_gen_header(shader, fs_key, &info_out); + shader->fs.uses_sample_shading = nir->info.fs.uses_sample_shading; + break; + case PIPE_SHADER_GEOMETRY: + ret = nvk_gs_gen_header(shader, nir, &info_out); + break; + case PIPE_SHADER_TESS_CTRL: + ret = nvk_tcs_gen_header(shader, &info_out); + break; + case PIPE_SHADER_TESS_EVAL: + ret = nvk_tes_gen_header(shader, &info_out); + break; + case PIPE_SHADER_COMPUTE: + break; + default: + unreachable("Invalid shader stage"); + break; + } + assert(ret == 0); + + if (info_out.bin.tlsSpace) { + assert(info_out.bin.tlsSpace < (1 << 24)); + shader->hdr[0] |= 1 << 26; + shader->hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */ + shader->slm_size = info_out.bin.tlsSpace; + } + + if (info_out.io.globalAccess) + shader->hdr[0] |= 1 << 26; + if (info_out.io.globalAccess & 0x2) + shader->hdr[0] |= 1 << 16; + if (info_out.io.fp64) + shader->hdr[0] |= 1 << 27; + + if (nir->xfb_info) { + shader->xfb = nvk_fill_transform_feedback_state(nir, &info_out); + if (shader->xfb == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + return VK_SUCCESS; +} diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 99551587e4e..86e58236040 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -4,7 +4,6 @@ */ #include "nvk_shader.h" -#include "nvk_cmd_buffer.h" #include "nvk_descriptor_set_layout.h" #include "nvk_device.h" #include "nvk_physical_device.h" @@ -21,7 +20,6 @@ #include "nak.h" #include "nir.h" #include "nir_builder.h" -#include "nir_xfb_info.h" #include "compiler/spirv/nir_spirv.h" #include "nv50_ir_driver.h" @@ -33,7 +31,6 @@ #include "clb097.h" #include "clc397.h" #include "clc597.h" -#include "nvk_cl9097.h" static void shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) @@ -488,554 +485,6 @@ nvk_shader_dump(struct nvk_shader *shader) } #endif -#include "tgsi/tgsi_ureg.h" - -/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than - * 124 scalar varying values. - */ -static uint32_t -nvc0_shader_input_address(unsigned sn, unsigned si) -{ - switch (sn) { - case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; - case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; - case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; - case TGSI_SEMANTIC_PRIMID: return 0x060; - case TGSI_SEMANTIC_LAYER: return 0x064; - case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; - case TGSI_SEMANTIC_PSIZE: return 0x06c; - case TGSI_SEMANTIC_POSITION: return 0x070; - case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; - case TGSI_SEMANTIC_FOG: return 0x2e8; - case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; - case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; - case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; - case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; - case TGSI_SEMANTIC_PCOORD: return 0x2e0; - case TGSI_SEMANTIC_TESSCOORD: return 0x2f0; - case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; - case TGSI_SEMANTIC_VERTEXID: return 0x2fc; - case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; - default: - assert(!"invalid TGSI input semantic"); - return ~0; - } -} - -static uint32_t -nvc0_shader_output_address(unsigned sn, unsigned si) -{ - switch (sn) { - case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; - case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; - case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; - case TGSI_SEMANTIC_PRIMID: return 0x060; - case TGSI_SEMANTIC_LAYER: return 0x064; - case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; - case TGSI_SEMANTIC_PSIZE: return 0x06c; - case TGSI_SEMANTIC_POSITION: return 0x070; - case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; - case TGSI_SEMANTIC_FOG: return 0x2e8; - case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; - case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; - case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; - case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; - case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; - case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0; - case TGSI_SEMANTIC_EDGEFLAG: return ~0; - default: - assert(!"invalid TGSI output semantic"); - return ~0; - } -} - -static int -nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info) -{ - unsigned i, c, n; - - for (n = 0, i = 0; i < info->numInputs; ++i) { - switch (info->in[i].sn) { - case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ - case TGSI_SEMANTIC_VERTEXID: - info->in[i].mask = 0x1; - info->in[i].slot[0] = - nvc0_shader_input_address(info->in[i].sn, 0) / 4; - continue; - default: - break; - } - for (c = 0; c < 4; ++c) - info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; - ++n; - } - - return 0; -} - -static int -nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info) -{ - unsigned offset; - unsigned i, c; - - for (i = 0; i < info->numInputs; ++i) { - offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si); - - for (c = 0; c < 4; ++c) - info->in[i].slot[c] = (offset + c * 0x4) / 4; - } - - return 0; -} - -static int -nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info) -{ - unsigned count = info->prop.fp.numColourResults * 4; - unsigned i, c; - - /* Compute the relative position of each color output, since skipped MRT - * positions will not have registers allocated to them. - */ - unsigned colors[8] = {0}; - for (i = 0; i < info->numOutputs; ++i) - if (info->out[i].sn == TGSI_SEMANTIC_COLOR) - colors[info->out[i].si] = 1; - for (i = 0, c = 0; i < 8; i++) - if (colors[i]) - colors[i] = c++; - for (i = 0; i < info->numOutputs; ++i) - if (info->out[i].sn == TGSI_SEMANTIC_COLOR) - for (c = 0; c < 4; ++c) - info->out[i].slot[c] = colors[info->out[i].si] * 4 + c; - - if (info->io.sampleMask < NV50_CODEGEN_MAX_VARYINGS) - info->out[info->io.sampleMask].slot[0] = count++; - else - if (info->target >= 0xe0) - count++; /* on Kepler, depth is always last colour reg + 2 */ - - if (info->io.fragDepth < NV50_CODEGEN_MAX_VARYINGS) - info->out[info->io.fragDepth].slot[2] = count; - - return 0; -} - -static int -nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info) -{ - unsigned offset; - unsigned i, c; - - for (i = 0; i < info->numOutputs; ++i) { - offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si); - - for (c = 0; c < 4; ++c) - info->out[i].slot[c] = (offset + c * 0x4) / 4; - } - - return 0; -} - -static int -nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) -{ - int ret; - - if (info->type == PIPE_SHADER_VERTEX) - ret = nvc0_vp_assign_input_slots(info); - else - ret = nvc0_sp_assign_input_slots(info); - if (ret) - return ret; - - if (info->type == PIPE_SHADER_FRAGMENT) - ret = nvc0_fp_assign_output_slots(info); - else - ret = nvc0_sp_assign_output_slots(info); - return ret; -} - -static inline void -nvk_vtgs_hdr_update_oread(struct nvk_shader *vs, uint8_t slot) -{ - uint8_t min = (vs->hdr[4] >> 12) & 0xff; - uint8_t max = (vs->hdr[4] >> 24); - - min = MIN2(min, slot); - max = MAX2(max, slot); - - vs->hdr[4] = (max << 24) | (min << 12); -} - -static int -nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) -{ - unsigned i, c, a; - - for (i = 0; i < info->numInputs; ++i) { - if (info->in[i].patch) - continue; - for (c = 0; c < 4; ++c) { - a = info->in[i].slot[c]; - if (info->in[i].mask & (1 << c)) - vs->hdr[5 + a / 32] |= 1 << (a % 32); - } - } - - for (i = 0; i < info->numOutputs; ++i) { - if (info->out[i].patch) - continue; - for (c = 0; c < 4; ++c) { - if (!(info->out[i].mask & (1 << c))) - continue; - assert(info->out[i].slot[c] >= 0x40 / 4); - a = info->out[i].slot[c] - 0x40 / 4; - vs->hdr[13 + a / 32] |= 1 << (a % 32); - if (info->out[i].oread) - nvk_vtgs_hdr_update_oread(vs, info->out[i].slot[c]); - } - } - - for (i = 0; i < info->numSysVals; ++i) { - switch (info->sv[i].sn) { - case SYSTEM_VALUE_PRIMITIVE_ID: - vs->hdr[5] |= 1 << 24; - break; - case SYSTEM_VALUE_INSTANCE_ID: - vs->hdr[10] |= 1 << 30; - break; - case SYSTEM_VALUE_VERTEX_ID: - vs->hdr[10] |= 1 << 31; - break; - case SYSTEM_VALUE_TESS_COORD: - /* We don't have the mask, nor the slots populated. While this could - * be achieved, the vast majority of the time if either of the coords - * are read, then both will be read. - */ - nvk_vtgs_hdr_update_oread(vs, 0x2f0 / 4); - nvk_vtgs_hdr_update_oread(vs, 0x2f4 / 4); - break; - default: - break; - } - } - - vs->vs.clip_enable = (1 << info->io.clipDistances) - 1; - vs->vs.cull_enable = - ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; - for (i = 0; i < info->io.cullDistances; ++i) - vs->vs.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); - - vs->vs.layer_viewport_relative = info->io.layer_viewport_relative; - - return 0; -} - -static int -nvk_vs_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) -{ - vs->hdr[0] = 0x20061 | (1 << 10); - vs->hdr[4] = 0xff000; - - return nvk_vtgp_gen_header(vs, info); -} - -static int -nvk_gs_gen_header(struct nvk_shader *gs, - const struct nir_shader *nir, - struct nv50_ir_prog_info_out *info) -{ - gs->hdr[0] = 0x20061 | (4 << 10); - - gs->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; - - switch (info->prop.gp.outputPrim) { - case MESA_PRIM_POINTS: - gs->hdr[3] = 0x01000000; - break; - case MESA_PRIM_LINE_STRIP: - gs->hdr[3] = 0x06000000; - break; - case MESA_PRIM_TRIANGLE_STRIP: - gs->hdr[3] = 0x07000000; - break; - default: - assert(0); - break; - } - - gs->hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024); - - gs->hdr[0] |= nir->info.gs.active_stream_mask << 28; - - return nvk_vtgp_gen_header(gs, info); -} - -static void -nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out *info, - struct nvk_shader *shader) -{ - // TODO: this is a little confusing because nouveau codegen uses - // MESA_PRIM_POINTS for unspecified domain and - // MESA_PRIM_POINTS = 0, the same as NV9097 ISOLINE enum - uint32_t domain_type; - switch (info->prop.tp.domain) { - case MESA_PRIM_LINES: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_ISOLINE; - break; - case MESA_PRIM_TRIANGLES: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_TRIANGLE; - break; - case MESA_PRIM_QUADS: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_QUAD; - break; - default: - domain_type = ~0; - break; - } - shader->tp.domain_type = domain_type; - if (domain_type == ~0) { - return; - } - - uint32_t spacing; - switch (info->prop.tp.partitioning) { - case PIPE_TESS_SPACING_EQUAL: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_INTEGER; - break; - case PIPE_TESS_SPACING_FRACTIONAL_ODD: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_ODD; - break; - case PIPE_TESS_SPACING_FRACTIONAL_EVEN: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_EVEN; - break; - default: - assert(!"invalid tessellator partitioning"); - break; - } - shader->tp.spacing = spacing; - - uint32_t output_prims; - if (info->prop.tp.outputPrim == MESA_PRIM_POINTS) { // point_mode - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_POINTS; - } else if (info->prop.tp.domain == MESA_PRIM_LINES) { // isoline domain - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_LINES; - } else { // triangle/quad domain - if (info->prop.tp.winding > 0) { - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CW; - } else { - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CCW; - } - } - shader->tp.output_prims = output_prims; -} - -static int -nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info) -{ - unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ - - if (info->numPatchConstants) - opcs = 8 + info->numPatchConstants * 4; - - tcs->hdr[0] = 0x20061 | (2 << 10); - - tcs->hdr[1] = opcs << 24; - tcs->hdr[2] = info->prop.tp.outputPatchSize << 24; - - tcs->hdr[4] = 0xff000; /* initial min/max parallel output read address */ - - nvk_vtgp_gen_header(tcs, info); - - if (info->target >= NVISA_GM107_CHIPSET) { - /* On GM107+, the number of output patch components has moved in the TCP - * header, but it seems like blob still also uses the old position. - * Also, the high 8-bits are located in between the min/max parallel - * field and has to be set after updating the outputs. */ - tcs->hdr[3] = (opcs & 0x0f) << 28; - tcs->hdr[4] |= (opcs & 0xf0) << 16; - } - - nvk_generate_tessellation_parameters(info, tcs); - - return 0; -} - -static int -nvk_tes_gen_header(struct nvk_shader *tes, struct nv50_ir_prog_info_out *info) -{ - tes->hdr[0] = 0x20061 | (3 << 10); - tes->hdr[4] = 0xff000; - - nvk_vtgp_gen_header(tes, info); - - nvk_generate_tessellation_parameters(info, tes); - - tes->hdr[18] |= 0x3 << 12; /* ? */ - - return 0; -} - -#define NVC0_INTERP_FLAT (1 << 0) -#define NVC0_INTERP_PERSPECTIVE (2 << 0) -#define NVC0_INTERP_LINEAR (3 << 0) -#define NVC0_INTERP_CENTROID (1 << 2) - -static uint8_t -nvk_hdr_interp_mode(const struct nv50_ir_varying *var) -{ - if (var->linear) - return NVC0_INTERP_LINEAR; - if (var->flat) - return NVC0_INTERP_FLAT; - return NVC0_INTERP_PERSPECTIVE; -} - - -static int -nvk_fs_gen_header(struct nvk_shader *fs, const struct nvk_fs_key *key, - struct nv50_ir_prog_info_out *info) -{ - unsigned i, c, a, m; - - /* just 00062 on Kepler */ - fs->hdr[0] = 0x20062 | (5 << 10); - fs->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ - - if (info->prop.fp.usesDiscard || key->zs_self_dep) - fs->hdr[0] |= 0x8000; - if (!info->prop.fp.separateFragData) - fs->hdr[0] |= 0x4000; - if (info->io.sampleMask < 80 /* PIPE_MAX_SHADER_OUTPUTS */) - fs->hdr[19] |= 0x1; - if (info->prop.fp.writesDepth) { - fs->hdr[19] |= 0x2; - fs->flags[0] = 0x11; /* deactivate ZCULL */ - } - - for (i = 0; i < info->numInputs; ++i) { - m = nvk_hdr_interp_mode(&info->in[i]); - if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { - fs->fs.colors |= 1 << info->in[i].si; - if (info->in[i].sc) - fs->fs.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); - } - for (c = 0; c < 4; ++c) { - if (!(info->in[i].mask & (1 << c))) - continue; - a = info->in[i].slot[c]; - if (info->in[i].slot[0] >= (0x060 / 4) && - info->in[i].slot[0] <= (0x07c / 4)) { - fs->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); - } else - if (info->in[i].slot[0] >= (0x2c0 / 4) && - info->in[i].slot[0] <= (0x2fc / 4)) { - fs->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; - } else { - if (info->in[i].slot[c] < (0x040 / 4) || - info->in[i].slot[c] > (0x380 / 4)) - continue; - a *= 2; - if (info->in[i].slot[0] >= (0x300 / 4)) - a -= 32; - fs->hdr[4 + a / 32] |= m << (a % 32); - } - } - } - /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ - if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) - fs->hdr[5] |= 0x30000000; - - for (i = 0; i < info->numOutputs; ++i) { - if (info->out[i].sn == TGSI_SEMANTIC_COLOR) - fs->hdr[18] |= 0xf << (4 * info->out[i].si); - } - - /* There are no "regular" attachments, but the shader still needs to be - * executed. It seems like it wants to think that it has some color - * outputs in order to actually run. - */ - if (info->prop.fp.numColourResults == 0 && - !info->prop.fp.writesDepth && - info->io.sampleMask >= 80 /* PIPE_MAX_SHADER_OUTPUTS */) - fs->hdr[18] |= 0xf; - - fs->fs.early_z = info->prop.fp.earlyFragTests; - fs->fs.sample_mask_in = info->prop.fp.usesSampleMaskIn; - fs->fs.reads_framebuffer = info->prop.fp.readsFramebuffer; - fs->fs.post_depth_coverage = info->prop.fp.postDepthCoverage; - - /* Mark position xy and layer as read */ - if (fs->fs.reads_framebuffer) - fs->hdr[5] |= 0x32000000; - - return 0; -} - -static uint8_t find_register_index_for_xfb_output(const struct nir_shader *nir, - nir_xfb_output_info output) -{ - nir_foreach_shader_out_variable(var, nir) { - uint32_t slots = glsl_count_vec4_slots(var->type, false, false); - for (uint32_t i = 0; i < slots; ++i) { - if (output.location == (var->data.location+i)) { - return var->data.driver_location+i; - } - } - } - // should not be reached - return 0; -} - -static struct nvk_transform_feedback_state * -nvk_fill_transform_feedback_state(struct nir_shader *nir, - const struct nv50_ir_prog_info_out *info) -{ - const uint8_t max_buffers = 4; - const uint8_t dw_bytes = 4; - const struct nir_xfb_info *nx = nir->xfb_info; - //nir_print_xfb_info(nx, stdout); - - struct nvk_transform_feedback_state *xfb = - malloc(sizeof(struct nvk_transform_feedback_state)); - - if (!xfb) - return NULL; - - for (uint8_t b = 0; b < max_buffers; ++b) { - xfb->stride[b] = b < nx->buffers_written ? nx->buffers[b].stride : 0; - xfb->varying_count[b] = 0; - xfb->stream[b] = nx->buffer_to_stream[b]; - } - memset(xfb->varying_index, 0xff, sizeof(xfb->varying_index)); /* = skip */ - - if (info->numOutputs == 0) - return xfb; - - for (uint32_t i = 0; i < nx->output_count; ++i) { - const nir_xfb_output_info output = nx->outputs[i]; - const uint8_t b = output.buffer; - const uint8_t r = find_register_index_for_xfb_output(nir, output); - uint32_t p = output.offset / dw_bytes; - - assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->varying_index[b])); - - u_foreach_bit(c, nx->outputs[i].component_mask) - xfb->varying_index[b][p++] = info->out[r].slot[c]; - - xfb->varying_count[b] = MAX2(xfb->varying_count[b], p); - } - - /* zero unused indices */ - for (uint8_t b = 0; b < 4; ++b) - for (uint32_t c = xfb->varying_count[b]; c & 3; ++c) - xfb->varying_index[b][c] = 0; - - return xfb; -} - static VkResult nvk_compile_nir_with_nak(struct nvk_physical_device *pdev, nir_shader *nir, @@ -1128,104 +577,10 @@ nvk_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, const struct nvk_fs_key *fs_key, struct nvk_shader *shader) { - struct nv50_ir_prog_info *info; - struct nv50_ir_prog_info_out info_out = {}; - int ret; - if (use_nak(pdev, nir->info.stage)) return nvk_compile_nir_with_nak(pdev, nir, fs_key, shader); - - info = CALLOC_STRUCT(nv50_ir_prog_info); - if (!info) - return false; - - info->type = pipe_shader_type_from_mesa(nir->info.stage); - info->target = pdev->info.chipset; - info->bin.nir = nir; - - for (unsigned i = 0; i < 3; i++) - shader->cp.block_size[i] = nir->info.workgroup_size[i]; - - info->bin.smemSize = shader->cp.smem_size; - info->dbgFlags = nvk_cg_get_prog_debug(); - info->optLevel = nvk_cg_get_prog_optimize(); - info->io.auxCBSlot = 1; - info->io.uboInfoBase = 0; - info->io.drawInfoBase = nvk_root_descriptor_offset(draw.base_vertex); - if (nir->info.stage == MESA_SHADER_COMPUTE) { - info->prop.cp.gridInfoBase = 0; - } else { - info->assignSlots = nvc0_program_assign_varying_slots; - } - ret = nv50_ir_generate_code(info, &info_out); - if (ret) - return VK_ERROR_UNKNOWN; - - if (info_out.bin.fixupData) { - nv50_ir_apply_fixups(info_out.bin.fixupData, info_out.bin.code, - fs_key && fs_key->force_per_sample, - false /* flatshade */, false /* alphatest */, - fs_key && fs_key->msaa); - } - - shader->stage = nir->info.stage; - shader->code_ptr = (uint8_t *)info_out.bin.code; - shader->code_size = info_out.bin.codeSize; - - if (info_out.target >= NVISA_GV100_CHIPSET) - shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 3); else - shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 1); - shader->cp.smem_size = info_out.bin.smemSize; - shader->num_barriers = info_out.numBarriers; - - switch (info->type) { - case PIPE_SHADER_VERTEX: - ret = nvk_vs_gen_header(shader, &info_out); - break; - case PIPE_SHADER_FRAGMENT: - ret = nvk_fs_gen_header(shader, fs_key, &info_out); - shader->fs.uses_sample_shading = nir->info.fs.uses_sample_shading; - break; - case PIPE_SHADER_GEOMETRY: - ret = nvk_gs_gen_header(shader, nir, &info_out); - break; - case PIPE_SHADER_TESS_CTRL: - ret = nvk_tcs_gen_header(shader, &info_out); - break; - case PIPE_SHADER_TESS_EVAL: - ret = nvk_tes_gen_header(shader, &info_out); - break; - case PIPE_SHADER_COMPUTE: - break; - default: - unreachable("Invalid shader stage"); - break; - } - assert(ret == 0); - - if (info_out.bin.tlsSpace) { - assert(info_out.bin.tlsSpace < (1 << 24)); - shader->hdr[0] |= 1 << 26; - shader->hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */ - shader->slm_size = info_out.bin.tlsSpace; - } - - if (info_out.io.globalAccess) - shader->hdr[0] |= 1 << 26; - if (info_out.io.globalAccess & 0x2) - shader->hdr[0] |= 1 << 16; - if (info_out.io.fp64) - shader->hdr[0] |= 1 << 27; - - if (nir->xfb_info) { - shader->xfb = nvk_fill_transform_feedback_state(nir, &info_out); - if (shader->xfb == NULL) { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - } - - return VK_SUCCESS; + return nvk_cg_compile_nir(pdev, nir, fs_key, shader); } VkResult diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index 29bf3d2ae17..d8a174a266d 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -168,4 +168,8 @@ nvk_cg_nir_options(const struct nvk_physical_device *pdev, void nvk_cg_preprocess_nir(nir_shader *nir); void nvk_cg_optimize_nir(nir_shader *nir); +VkResult nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, + const struct nvk_fs_key *fs_key, + struct nvk_shader *shader); + #endif
