Module: Mesa Branch: main Commit: 23e1f3c373224ecc31c703657af1356debac9710 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23e1f3c373224ecc31c703657af1356debac9710
Author: Faith Ekstrand <[email protected]> Date: Tue Nov 14 13:32:37 2023 -0600 nvk: Use nak_shader_info natively Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26197> --- src/nouveau/vulkan/nvk_cmd_buffer.c | 4 +- src/nouveau/vulkan/nvk_cmd_dispatch.c | 6 +- src/nouveau/vulkan/nvk_codegen.c | 205 ++++++++++++----------------- src/nouveau/vulkan/nvk_compute_pipeline.c | 30 ++--- src/nouveau/vulkan/nvk_graphics_pipeline.c | 85 ++++++------ src/nouveau/vulkan/nvk_shader.c | 85 ++---------- src/nouveau/vulkan/nvk_shader.h | 54 +------- 7 files changed, 161 insertions(+), 308 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index b6d7ee3979a..80abfdd0dd8 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -357,8 +357,8 @@ nvk_CmdBindPipeline(VkCommandBuffer commandBuffer, struct nvk_device *dev = nvk_cmd_buffer_device(cmd); for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { - if (pipeline->shaders[s].slm_size) - nvk_device_ensure_slm(dev, pipeline->shaders[s].slm_size); + if (pipeline->shaders[s].info.slm_size) + nvk_device_ensure_slm(dev, pipeline->shaders[s].info.slm_size); } switch (pipelineBindPoint) { diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index 05a2c734a92..96eca35bfbe 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -151,9 +151,9 @@ nvk_compute_local_size(struct nvk_cmd_buffer *cmd) const struct nvk_shader *shader = &pipeline->base.shaders[MESA_SHADER_COMPUTE]; - return shader->cp.block_size[0] * - shader->cp.block_size[1] * - shader->cp.block_size[2]; + return shader->info.cs.local_size[0] * + shader->info.cs.local_size[1] * + shader->info.cs.local_size[2]; } static uint64_t diff --git a/src/nouveau/vulkan/nvk_codegen.c b/src/nouveau/vulkan/nvk_codegen.c index e888fe5412a..39da05be49d 100644 --- a/src/nouveau/vulkan/nvk_codegen.c +++ b/src/nouveau/vulkan/nvk_codegen.c @@ -465,13 +465,13 @@ nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) static inline void nvk_vtgs_hdr_update_oread(struct nvk_shader *vs, uint8_t slot) { - uint8_t min = (vs->hdr[4] >> 12) & 0xff; - uint8_t max = (vs->hdr[4] >> 24); + uint8_t min = (vs->info.hdr[4] >> 12) & 0xff; + uint8_t max = (vs->info.hdr[4] >> 24); min = MIN2(min, slot); max = MAX2(max, slot); - vs->hdr[4] = (max << 24) | (min << 12); + vs->info.hdr[4] = (max << 24) | (min << 12); } static int @@ -485,7 +485,7 @@ nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) for (c = 0; c < 4; ++c) { a = info->in[i].slot[c]; if (info->in[i].mask & (1 << c)) - vs->hdr[5 + a / 32] |= 1 << (a % 32); + vs->info.hdr[5 + a / 32] |= 1 << (a % 32); } } @@ -497,7 +497,7 @@ nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) continue; assert(info->out[i].slot[c] >= 0x40 / 4); a = info->out[i].slot[c] - 0x40 / 4; - vs->hdr[13 + a / 32] |= 1 << (a % 32); + vs->info.hdr[13 + a / 32] |= 1 << (a % 32); if (info->out[i].oread) nvk_vtgs_hdr_update_oread(vs, info->out[i].slot[c]); } @@ -506,13 +506,13 @@ nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) for (i = 0; i < info->numSysVals; ++i) { switch (info->sv[i].sn) { case SYSTEM_VALUE_PRIMITIVE_ID: - vs->hdr[5] |= 1 << 24; + vs->info.hdr[5] |= 1 << 24; break; case SYSTEM_VALUE_INSTANCE_ID: - vs->hdr[10] |= 1 << 30; + vs->info.hdr[10] |= 1 << 30; break; case SYSTEM_VALUE_VERTEX_ID: - vs->hdr[10] |= 1 << 31; + vs->info.hdr[10] |= 1 << 31; break; case SYSTEM_VALUE_TESS_COORD: /* We don't have the mask, nor the slots populated. While this could @@ -527,13 +527,10 @@ nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) } } - vs->vs.clip_enable = (1 << info->io.clipDistances) - 1; - vs->vs.cull_enable = + vs->info.vtg.writes_layer = (vs->info.hdr[13] & (1 << 9)) != 0; + vs->info.vtg.clip_enable = (1 << info->io.clipDistances) - 1; + vs->info.vtg.cull_enable = ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; - for (i = 0; i < info->io.cullDistances; ++i) - vs->vs.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); - - vs->vs.layer_viewport_relative = info->io.layer_viewport_relative; return 0; } @@ -541,8 +538,8 @@ nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) static int nvk_vs_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info) { - vs->hdr[0] = 0x20061 | (1 << 10); - vs->hdr[4] = 0xff000; + vs->info.hdr[0] = 0x20061 | (1 << 10); + vs->info.hdr[4] = 0xff000; return nvk_vtgp_gen_header(vs, info); } @@ -552,28 +549,28 @@ nvk_gs_gen_header(struct nvk_shader *gs, const struct nir_shader *nir, struct nv50_ir_prog_info_out *info) { - gs->hdr[0] = 0x20061 | (4 << 10); + gs->info.hdr[0] = 0x20061 | (4 << 10); - gs->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; + gs->info.hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; switch (info->prop.gp.outputPrim) { case MESA_PRIM_POINTS: - gs->hdr[3] = 0x01000000; + gs->info.hdr[3] = 0x01000000; break; case MESA_PRIM_LINE_STRIP: - gs->hdr[3] = 0x06000000; + gs->info.hdr[3] = 0x06000000; break; case MESA_PRIM_TRIANGLE_STRIP: - gs->hdr[3] = 0x07000000; + gs->info.hdr[3] = 0x07000000; break; default: assert(0); break; } - gs->hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024); + gs->info.hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024); - gs->hdr[0] |= nir->info.gs.active_stream_mask << 28; + gs->info.hdr[0] |= nir->info.gs.active_stream_mask << 28; return nvk_vtgp_gen_header(gs, info); } @@ -585,56 +582,46 @@ nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out *info, // TODO: this is a little confusing because nouveau codegen uses // MESA_PRIM_POINTS for unspecified domain and // MESA_PRIM_POINTS = 0, the same as NV9097 ISOLINE enum - uint32_t domain_type; switch (info->prop.tp.domain) { case MESA_PRIM_LINES: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_ISOLINE; + shader->info.ts.domain = NAK_TS_DOMAIN_ISOLINE; break; case MESA_PRIM_TRIANGLES: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_TRIANGLE; + shader->info.ts.domain = NAK_TS_DOMAIN_TRIANGLE; break; case MESA_PRIM_QUADS: - domain_type = NV9097_SET_TESSELLATION_PARAMETERS_DOMAIN_TYPE_QUAD; + shader->info.ts.domain = NAK_TS_DOMAIN_QUAD; break; default: - domain_type = ~0; - break; - } - shader->tp.domain_type = domain_type; - if (domain_type == ~0) { return; } - uint32_t spacing; switch (info->prop.tp.partitioning) { case PIPE_TESS_SPACING_EQUAL: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_INTEGER; + shader->info.ts.spacing = NAK_TS_SPACING_INTEGER; break; case PIPE_TESS_SPACING_FRACTIONAL_ODD: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_ODD; + shader->info.ts.spacing = NAK_TS_SPACING_FRACT_ODD; break; case PIPE_TESS_SPACING_FRACTIONAL_EVEN: - spacing = NV9097_SET_TESSELLATION_PARAMETERS_SPACING_FRACTIONAL_EVEN; + shader->info.ts.spacing = NAK_TS_SPACING_FRACT_EVEN; break; default: assert(!"invalid tessellator partitioning"); break; } - shader->tp.spacing = spacing; - uint32_t output_prims; if (info->prop.tp.outputPrim == MESA_PRIM_POINTS) { // point_mode - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_POINTS; + shader->info.ts.prims = NAK_TS_PRIMS_POINTS; } else if (info->prop.tp.domain == MESA_PRIM_LINES) { // isoline domain - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_LINES; + shader->info.ts.prims = NAK_TS_PRIMS_LINES; } else { // triangle/quad domain if (info->prop.tp.winding > 0) { - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CW; + shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CW; } else { - output_prims = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CCW; + shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CCW; } } - shader->tp.output_prims = output_prims; } static int @@ -645,12 +632,12 @@ nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info) if (info->numPatchConstants) opcs = 8 + info->numPatchConstants * 4; - tcs->hdr[0] = 0x20061 | (2 << 10); + tcs->info.hdr[0] = 0x20061 | (2 << 10); - tcs->hdr[1] = opcs << 24; - tcs->hdr[2] = info->prop.tp.outputPatchSize << 24; + tcs->info.hdr[1] = opcs << 24; + tcs->info.hdr[2] = info->prop.tp.outputPatchSize << 24; - tcs->hdr[4] = 0xff000; /* initial min/max parallel output read address */ + tcs->info.hdr[4] = 0xff000; /* initial min/max parallel output read address */ nvk_vtgp_gen_header(tcs, info); @@ -659,8 +646,8 @@ nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info) * header, but it seems like blob still also uses the old position. * Also, the high 8-bits are located in between the min/max parallel * field and has to be set after updating the outputs. */ - tcs->hdr[3] = (opcs & 0x0f) << 28; - tcs->hdr[4] |= (opcs & 0xf0) << 16; + tcs->info.hdr[3] = (opcs & 0x0f) << 28; + tcs->info.hdr[4] |= (opcs & 0xf0) << 16; } nvk_generate_tessellation_parameters(info, tcs); @@ -671,14 +658,14 @@ nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info) static int nvk_tes_gen_header(struct nvk_shader *tes, struct nv50_ir_prog_info_out *info) { - tes->hdr[0] = 0x20061 | (3 << 10); - tes->hdr[4] = 0xff000; + tes->info.hdr[0] = 0x20061 | (3 << 10); + tes->info.hdr[4] = 0xff000; nvk_vtgp_gen_header(tes, info); nvk_generate_tessellation_parameters(info, tes); - tes->hdr[18] |= 0x3 << 12; /* ? */ + tes->info.hdr[18] |= 0x3 << 12; /* ? */ return 0; } @@ -706,38 +693,33 @@ nvk_fs_gen_header(struct nvk_shader *fs, const struct nak_fs_key *key, unsigned i, c, a, m; /* just 00062 on Kepler */ - fs->hdr[0] = 0x20062 | (5 << 10); - fs->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + fs->info.hdr[0] = 0x20062 | (5 << 10); + fs->info.hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ if (info->prop.fp.usesDiscard || key->zs_self_dep) - fs->hdr[0] |= 0x8000; + fs->info.hdr[0] |= 0x8000; if (!info->prop.fp.separateFragData) - fs->hdr[0] |= 0x4000; + fs->info.hdr[0] |= 0x4000; if (info->io.sampleMask < 80 /* PIPE_MAX_SHADER_OUTPUTS */) - fs->hdr[19] |= 0x1; + fs->info.hdr[19] |= 0x1; if (info->prop.fp.writesDepth) { - fs->hdr[19] |= 0x2; - fs->flags[0] = 0x11; /* deactivate ZCULL */ + fs->info.hdr[19] |= 0x2; + fs->info.fs.writes_depth = true; } for (i = 0; i < info->numInputs; ++i) { m = nvk_hdr_interp_mode(&info->in[i]); - if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { - fs->fs.colors |= 1 << info->in[i].si; - if (info->in[i].sc) - fs->fs.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); - } for (c = 0; c < 4; ++c) { if (!(info->in[i].mask & (1 << c))) continue; a = info->in[i].slot[c]; if (info->in[i].slot[0] >= (0x060 / 4) && info->in[i].slot[0] <= (0x07c / 4)) { - fs->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); + fs->info.hdr[5] |= 1 << (24 + (a - 0x060 / 4)); } else if (info->in[i].slot[0] >= (0x2c0 / 4) && info->in[i].slot[0] <= (0x2fc / 4)) { - fs->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; + fs->info.hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; } else { if (info->in[i].slot[c] < (0x040 / 4) || info->in[i].slot[c] > (0x380 / 4)) @@ -745,17 +727,17 @@ nvk_fs_gen_header(struct nvk_shader *fs, const struct nak_fs_key *key, a *= 2; if (info->in[i].slot[0] >= (0x300 / 4)) a -= 32; - fs->hdr[4 + a / 32] |= m << (a % 32); + fs->info.hdr[4 + a / 32] |= m << (a % 32); } } } /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) - fs->hdr[5] |= 0x30000000; + fs->info.hdr[5] |= 0x30000000; for (i = 0; i < info->numOutputs; ++i) { if (info->out[i].sn == TGSI_SEMANTIC_COLOR) - fs->hdr[18] |= 0xf << (4 * info->out[i].si); + fs->info.hdr[18] |= 0xf << (4 * info->out[i].si); } /* There are no "regular" attachments, but the shader still needs to be @@ -765,16 +747,11 @@ nvk_fs_gen_header(struct nvk_shader *fs, const struct nak_fs_key *key, if (info->prop.fp.numColourResults == 0 && !info->prop.fp.writesDepth && info->io.sampleMask >= 80 /* PIPE_MAX_SHADER_OUTPUTS */) - fs->hdr[18] |= 0xf; - - fs->fs.early_z = info->prop.fp.earlyFragTests; - fs->fs.sample_mask_in = info->prop.fp.usesSampleMaskIn; - fs->fs.reads_framebuffer = info->prop.fp.readsFramebuffer; - fs->fs.post_depth_coverage = info->prop.fp.postDepthCoverage; + fs->info.hdr[18] |= 0xf; - /* Mark position xy and layer as read */ - if (fs->fs.reads_framebuffer) - fs->hdr[5] |= 0x32000000; + fs->info.fs.early_fragment_tests = info->prop.fp.earlyFragTests; + fs->info.fs.reads_sample_mask = info->prop.fp.usesSampleMaskIn; + fs->info.fs.post_depth_coverage = info->prop.fp.postDepthCoverage; return 0; } @@ -794,8 +771,9 @@ static uint8_t find_register_index_for_xfb_output(const struct nir_shader *nir, return 0; } -static struct nvk_transform_feedback_state * -nvk_fill_transform_feedback_state(struct nir_shader *nir, +static void +nvk_fill_transform_feedback_state(struct nak_xfb_info *xfb, + struct nir_shader *nir, const struct nv50_ir_prog_info_out *info) { const uint8_t max_buffers = 4; @@ -803,21 +781,17 @@ nvk_fill_transform_feedback_state(struct nir_shader *nir, const struct nir_xfb_info *nx = nir->xfb_info; //nir_print_xfb_info(nx, stdout); - struct nvk_transform_feedback_state *xfb = - malloc(sizeof(struct nvk_transform_feedback_state)); - - if (!xfb) - return NULL; + memset(xfb, 0, sizeof(*xfb)); for (uint8_t b = 0; b < max_buffers; ++b) { xfb->stride[b] = b < nx->buffers_written ? nx->buffers[b].stride : 0; - xfb->varying_count[b] = 0; + xfb->attr_count[b] = 0; xfb->stream[b] = nx->buffer_to_stream[b]; } - memset(xfb->varying_index, 0xff, sizeof(xfb->varying_index)); /* = skip */ + memset(xfb->attr_index, 0xff, sizeof(xfb->attr_index)); /* = skip */ if (info->numOutputs == 0) - return xfb; + return; for (uint32_t i = 0; i < nx->output_count; ++i) { const nir_xfb_output_info output = nx->outputs[i]; @@ -825,20 +799,18 @@ nvk_fill_transform_feedback_state(struct nir_shader *nir, const uint8_t r = find_register_index_for_xfb_output(nir, output); uint32_t p = output.offset / dw_bytes; - assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->varying_index[b])); + assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->attr_index[b])); u_foreach_bit(c, nx->outputs[i].component_mask) - xfb->varying_index[b][p++] = info->out[r].slot[c]; + xfb->attr_index[b][p++] = info->out[r].slot[c]; - xfb->varying_count[b] = MAX2(xfb->varying_count[b], p); + xfb->attr_count[b] = MAX2(xfb->attr_count[b], p); } /* zero unused indices */ for (uint8_t b = 0; b < 4; ++b) - for (uint32_t c = xfb->varying_count[b]; c & 3; ++c) - xfb->varying_index[b][c] = 0; - - return xfb; + for (uint32_t c = xfb->attr_count[b]; c & 3; ++c) + xfb->attr_index[b][c] = 0; } VkResult @@ -861,9 +833,8 @@ nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, info->bin.nir = nir; for (unsigned i = 0; i < 3; i++) - shader->cp.block_size[i] = nir->info.workgroup_size[i]; + shader->info.cs.local_size[i] = nir->info.workgroup_size[i]; - info->bin.smemSize = shader->cp.smem_size; info->dbgFlags = nvk_cg_get_prog_debug(); info->optLevel = nvk_cg_get_prog_optimize(); info->io.auxCBSlot = 1; @@ -885,16 +856,22 @@ nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, fs_key && fs_key->force_sample_shading); } - shader->stage = nir->info.stage; + shader->info.stage = nir->info.stage; shader->code_ptr = (uint8_t *)info_out.bin.code; shader->code_size = info_out.bin.codeSize; if (info_out.target >= NVISA_GV100_CHIPSET) - shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 3); + shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 3); else - shader->num_gprs = MAX2(4, info_out.bin.maxGPR + 1); - shader->cp.smem_size = info_out.bin.smemSize; - shader->num_barriers = info_out.numBarriers; + shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 1); + shader->info.num_barriers = info_out.numBarriers; + + if (info_out.bin.tlsSpace) { + assert(info_out.bin.tlsSpace < (1 << 24)); + shader->info.hdr[0] |= 1 << 26; + shader->info.hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */ + shader->info.slm_size = info_out.bin.tlsSpace; + } switch (info->type) { case PIPE_SHADER_VERTEX: @@ -902,7 +879,7 @@ nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, break; case PIPE_SHADER_FRAGMENT: ret = nvk_fs_gen_header(shader, fs_key, &info_out); - shader->fs.uses_sample_shading = nir->info.fs.uses_sample_shading; + shader->info.fs.uses_sample_shading = nir->info.fs.uses_sample_shading; break; case PIPE_SHADER_GEOMETRY: ret = nvk_gs_gen_header(shader, nir, &info_out); @@ -914,6 +891,7 @@ nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, ret = nvk_tes_gen_header(shader, &info_out); break; case PIPE_SHADER_COMPUTE: + shader->info.cs.smem_size = info_out.bin.smemSize; break; default: unreachable("Invalid shader stage"); @@ -921,26 +899,15 @@ nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir, } assert(ret == 0); - if (info_out.bin.tlsSpace) { - assert(info_out.bin.tlsSpace < (1 << 24)); - shader->hdr[0] |= 1 << 26; - shader->hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */ - shader->slm_size = info_out.bin.tlsSpace; - } - if (info_out.io.globalAccess) - shader->hdr[0] |= 1 << 26; + shader->info.hdr[0] |= 1 << 26; if (info_out.io.globalAccess & 0x2) - shader->hdr[0] |= 1 << 16; + shader->info.hdr[0] |= 1 << 16; if (info_out.io.fp64) - shader->hdr[0] |= 1 << 27; + shader->info.hdr[0] |= 1 << 27; - if (nir->xfb_info) { - shader->xfb = nvk_fill_transform_feedback_state(nir, &info_out); - if (shader->xfb == NULL) { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - } + if (nir->xfb_info) + nvk_fill_transform_feedback_state(&shader->info.vtg.xfb, nir, &info_out); return VK_SUCCESS; } diff --git a/src/nouveau/vulkan/nvk_compute_pipeline.c b/src/nouveau/vulkan/nvk_compute_pipeline.c index 7056ff498cc..f51cddea38b 100644 --- a/src/nouveau/vulkan/nvk_compute_pipeline.c +++ b/src/nouveau/vulkan/nvk_compute_pipeline.c @@ -54,21 +54,21 @@ gv100_sm_config_smem_size(uint32_t size) #define base_compute_setup_launch_desc_template(qmd, shader, class_id, version_major, version_minor) \ do { \ QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->num_barriers); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \ - shader->cp.block_size[0]); \ + shader->info.cs.local_size[0]); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \ - shader->cp.block_size[1]); \ + shader->info.cs.local_size[1]); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \ - shader->cp.block_size[2]); \ + shader->info.cs.local_size[2]); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \ QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \ - align(shader->slm_size, 0x10)); \ + align(shader->info.slm_size, 0x10)); \ QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \ - align(shader->cp.smem_size, 0x100)); \ + align(shader->info.cs.smem_size, 0x100)); \ } while (0) static void @@ -83,11 +83,11 @@ nva0c0_compute_setup_launch_desc_template(uint32_t *qmd, NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE); NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE); - if (shader->cp.smem_size <= (16 << 10)) + if (shader->info.cs.smem_size <= (16 << 10)) NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); - else if (shader->cp.smem_size <= (32 << 10)) + else if (shader->info.cs.smem_size <= (32 << 10)) NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); - else if (shader->cp.smem_size <= (48 << 10)) + else if (shader->info.cs.smem_size <= (48 << 10)) NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); else unreachable("Invalid shared memory size"); @@ -95,7 +95,7 @@ nva0c0_compute_setup_launch_desc_template(uint32_t *qmd, uint64_t addr = nvk_shader_address(shader); assert(addr < 0xffffffff); NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr); - NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->num_gprs); + NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); } @@ -110,7 +110,7 @@ nvc0c0_compute_setup_launch_desc_template(uint32_t *qmd, NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr); - NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->num_gprs); + NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); } static void @@ -126,9 +126,9 @@ nvc3c0_compute_setup_launch_desc_template(uint32_t *qmd, NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, gv100_sm_config_smem_size(96 * 1024)); NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->cp.smem_size)); + gv100_sm_config_smem_size(shader->info.cs.smem_size)); - NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->num_gprs); + NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); uint64_t addr = nvk_shader_address(shader); NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); @@ -148,9 +148,9 @@ nvc6c0_compute_setup_launch_desc_template(uint32_t *qmd, NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, gv100_sm_config_smem_size(96 * 1024)); NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->cp.smem_size)); + gv100_sm_config_smem_size(shader->info.cs.smem_size)); - NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->num_gprs); + NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); uint64_t addr = nvk_shader_address(shader); NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); diff --git a/src/nouveau/vulkan/nvk_graphics_pipeline.c b/src/nouveau/vulkan/nvk_graphics_pipeline.c index 4af8b6cd951..4213f229aeb 100644 --- a/src/nouveau/vulkan/nvk_graphics_pipeline.c +++ b/src/nouveau/vulkan/nvk_graphics_pipeline.c @@ -202,22 +202,20 @@ emit_pipeline_ct_write_state(struct nv_push *p, } static void -emit_pipeline_xfb_state(struct nv_push *p, - const struct nvk_transform_feedback_state *xfb) +emit_pipeline_xfb_state(struct nv_push *p, const struct nak_xfb_info *xfb) { - const uint8_t max_buffers = 4; - for (uint8_t b = 0; b < max_buffers; ++b) { - const uint32_t var_count = xfb->varying_count[b]; + for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) { + const uint8_t attr_count = xfb->attr_count[b]; P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b)); P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]); - P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, var_count); + P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count); P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]); /* upload packed varying indices in multiples of 4 bytes */ - const uint32_t n = (var_count + 3) / 4; + const uint32_t n = DIV_ROUND_UP(attr_count, 4); if (n > 0) { P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0)); - P_INLINE_ARRAY(p, (const uint32_t*)xfb->varying_index[b], n); + P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n); } } } @@ -235,22 +233,19 @@ emit_tessellation_paramaters(struct nv_push *p, const struct nvk_shader *shader, const struct vk_tessellation_state *state) { - const uint32_t cw = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CW; - const uint32_t ccw = NV9097_SET_TESSELLATION_PARAMETERS_OUTPUT_PRIMITIVES_TRIANGLES_CCW; - uint32_t output_prims = shader->tp.output_prims; + enum nak_ts_prims prims = shader->info.ts.prims; /* When the origin is lower-left, we have to flip the winding order */ if (state->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { - if (output_prims == cw) { - output_prims = ccw; - } else if (output_prims == ccw) { - output_prims = cw; - } + if (prims == NAK_TS_PRIMS_TRIANGLES_CW) + prims = NAK_TS_PRIMS_TRIANGLES_CCW; + else if (prims == NAK_TS_PRIMS_TRIANGLES_CCW) + prims = NAK_TS_PRIMS_TRIANGLES_CW; } P_MTHD(p, NV9097, SET_TESSELLATION_PARAMETERS); P_NV9097_SET_TESSELLATION_PARAMETERS(p, { - shader->tp.domain_type, - shader->tp.spacing, - output_prims + shader->info.ts.domain, + shader->info.ts.spacing, + prims }); } @@ -404,7 +399,8 @@ nvk_graphics_pipeline_create(struct nvk_device *dev, P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr); } - P_IMMD(p, NV9097, SET_PIPELINE_REGISTER_COUNT(idx), shader->num_gprs); + P_IMMD(p, NV9097, SET_PIPELINE_REGISTER_COUNT(idx), + shader->info.num_gprs); switch (stage) { case MESA_SHADER_VERTEX: @@ -420,32 +416,35 @@ nvk_graphics_pipeline_create(struct nvk_device *dev, }); P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20); - P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z, shader->fs.early_z); + P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z, + shader->info.fs.early_fragment_tests); if (dev->pdev->info.cls_eng3d >= MAXWELL_B) { P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK, - shader->fs.post_depth_coverage); + shader->info.fs.post_depth_coverage); } else { - assert(!shader->fs.post_depth_coverage); + assert(!shader->info.fs.post_depth_coverage); } - P_MTHD(p, NV9097, SET_ZCULL_BOUNDS); - P_INLINE_DATA(p, shader->flags[0]); + P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, { + .z_min_unbounded_enable = shader->info.fs.writes_depth, + .z_max_unbounded_enable = shader->info.fs.writes_depth, + }); /* If we're using the incoming sample mask and doing sample shading, * we have to do sample shading "to the max", otherwise there's no * way to tell which sets of samples are covered by the current * invocation. */ - force_max_samples = shader->fs.sample_mask_in || - shader->fs.uses_sample_shading; + force_max_samples = shader->info.fs.reads_sample_mask || + shader->info.fs.uses_sample_shading; break; case MESA_SHADER_TESS_CTRL: + break; + case MESA_SHADER_TESS_EVAL: - if (shader->tp.domain_type != ~0) { - emit_tessellation_paramaters(p, shader, state.ts); - } + emit_tessellation_paramaters(p, shader, state.ts); break; default: @@ -453,8 +452,8 @@ nvk_graphics_pipeline_create(struct nvk_device *dev, } } - const uint8_t clip_cull = last_geom->vs.clip_enable | - last_geom->vs.cull_enable; + const uint8_t clip_cull = last_geom->info.vtg.clip_enable | + last_geom->info.vtg.cull_enable; if (clip_cull) { P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, { .plane0 = (clip_cull >> 0) & 1, @@ -467,28 +466,26 @@ nvk_graphics_pipeline_create(struct nvk_device *dev, .plane7 = (clip_cull >> 7) & 1, }); P_IMMD(p, NV9097, SET_USER_CLIP_OP, { - .plane0 = (last_geom->vs.cull_enable >> 0) & 1, - .plane1 = (last_geom->vs.cull_enable >> 1) & 1, - .plane2 = (last_geom->vs.cull_enable >> 2) & 1, - .plane3 = (last_geom->vs.cull_enable >> 3) & 1, - .plane4 = (last_geom->vs.cull_enable >> 4) & 1, - .plane5 = (last_geom->vs.cull_enable >> 5) & 1, - .plane6 = (last_geom->vs.cull_enable >> 6) & 1, - .plane7 = (last_geom->vs.cull_enable >> 7) & 1, + .plane0 = (last_geom->info.vtg.cull_enable >> 0) & 1, + .plane1 = (last_geom->info.vtg.cull_enable >> 1) & 1, + .plane2 = (last_geom->info.vtg.cull_enable >> 2) & 1, + .plane3 = (last_geom->info.vtg.cull_enable >> 3) & 1, + .plane4 = (last_geom->info.vtg.cull_enable >> 4) & 1, + .plane5 = (last_geom->info.vtg.cull_enable >> 5) & 1, + .plane6 = (last_geom->info.vtg.cull_enable >> 6) & 1, + .plane7 = (last_geom->info.vtg.cull_enable >> 7) & 1, }); } /* TODO: prog_selects_layer */ P_IMMD(p, NV9097, SET_RT_LAYER, { .v = 0, - .control = (last_geom->hdr[13] & (1 << 9)) ? + .control = last_geom->info.vtg.writes_layer ? CONTROL_GEOMETRY_SHADER_SELECTS_LAYER : CONTROL_V_SELECTS_LAYER, }); - if (last_geom->xfb) { - emit_pipeline_xfb_state(&push, last_geom->xfb); - } + emit_pipeline_xfb_state(&push, &last_geom->info.vtg.xfb); if (state.ts) emit_pipeline_ts_state(&push, state.ts); if (state.vp) emit_pipeline_vp_state(&push, state.vp); diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index efb4d5f589c..3ad019c61f8 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -375,18 +375,18 @@ nvk_shader_dump(struct nvk_shader *shader) { unsigned pos; - if (shader->stage != MESA_SHADER_COMPUTE) { + if (shader->info.stage != MESA_SHADER_COMPUTE) { _debug_printf("dumping HDR for %s shader\n", - _mesa_shader_stage_to_string(shader->stage)); - for (pos = 0; pos < ARRAY_SIZE(shader->hdr); ++pos) + _mesa_shader_stage_to_string(shader->info.stage)); + for (pos = 0; pos < ARRAY_SIZE(shader->info.hdr); ++pos) _debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n", - pos * sizeof(shader->hdr[0]), shader->hdr[pos]); + pos * sizeof(shader->info.hdr[0]), shader->info.hdr[pos]); } _debug_printf("shader binary code (0x%x bytes):", shader->code_size); for (pos = 0; pos < shader->code_size / 4; ++pos) { if ((pos % 8) == 0) _debug_printf("\n"); - _debug_printf("%08x ", ((uint32_t *)shader->code_ptr)[pos]); + _debug_printf("%08x ", ((const uint32_t *)shader->code_ptr)[pos]); } _debug_printf("\n"); } @@ -398,70 +398,10 @@ nvk_compile_nir_with_nak(struct nvk_physical_device *pdev, const struct nak_fs_key *fs_key, struct nvk_shader *shader) { - struct nak_shader_bin *bin = nak_compile_shader(nir, pdev->nak, fs_key); - - shader->stage = nir->info.stage; - - shader->num_gprs = bin->info.num_gprs; - shader->num_barriers = bin->info.num_barriers; - shader->slm_size = bin->info.slm_size; - - switch (nir->info.stage) { - case MESA_SHADER_COMPUTE: - for (unsigned i = 0; i < 3; i++) - shader->cp.block_size[i] = bin->info.cs.local_size[i]; - shader->cp.smem_size = bin->info.cs.smem_size; - break; - - case MESA_SHADER_FRAGMENT: - if (bin->info.fs.writes_depth) - shader->flags[0] = 0x11; /* deactivate ZCULL */ - shader->fs.sample_mask_in = bin->info.fs.reads_sample_mask; - shader->fs.post_depth_coverage = bin->info.fs.post_depth_coverage; - shader->fs.uses_sample_shading = bin->info.fs.uses_sample_shading; - shader->fs.early_z = bin->info.fs.early_fragment_tests; - break; - - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: { - shader->vs.clip_enable = bin->info.vtg.clip_enable; - shader->vs.cull_enable = bin->info.vtg.cull_enable; - - if (nir->info.stage == MESA_SHADER_TESS_EVAL) { - shader->tp.domain_type = bin->info.ts.domain; - shader->tp.spacing = bin->info.ts.spacing; - shader->tp.output_prims = bin->info.ts.prims; - } else { - shader->tp.domain_type = ~0; - } - - bool has_xfb = false; - for (unsigned b = 0; b < 4; b++) { - if (bin->info.vtg.xfb.attr_count[b] > 0) { - has_xfb = true; - break; - } - } - - if (has_xfb) { - shader->xfb = malloc(sizeof(*shader->xfb)); - STATIC_ASSERT(sizeof(*shader->xfb) == sizeof(bin->info.vtg.xfb)); - memcpy(shader->xfb, &bin->info.vtg.xfb, sizeof(*shader->xfb)); - } - break; - } - - default: - break; - } - - STATIC_ASSERT(sizeof(shader->hdr) == sizeof(bin->info.hdr)); - memcpy(shader->hdr, bin->info.hdr, sizeof(bin->info.hdr)); - - shader->nak = bin; - shader->code_ptr = (void *)bin->code; - shader->code_size = bin->code_size; + shader->nak = nak_compile_shader(nir, pdev->nak, fs_key); + shader->info = shader->nak->info; + shader->code_ptr = shader->nak->code; + shader->code_size = shader->nak->code_size; return VK_SUCCESS; } @@ -481,7 +421,7 @@ VkResult nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader) { uint32_t hdr_size = 0; - if (shader->stage != MESA_SHADER_COMPUTE) { + if (shader->info.stage != MESA_SHADER_COMPUTE) { if (dev->pdev->info.cls_eng3d >= TURING_A) hdr_size = TU102_SHADER_HEADER_SIZE; else @@ -506,7 +446,8 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader) if (data == NULL) return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); - memcpy(data + offset, shader->hdr, hdr_size); + assert(hdr_size <= sizeof(shader->info.hdr)); + memcpy(data + offset, shader->info.hdr, hdr_size); memcpy(data + offset + hdr_size, shader->code_ptr, shader->code_size); #ifndef NDEBUG @@ -536,6 +477,4 @@ nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader) if (shader->nak) nak_shader_bin_destroy(shader->nak); - - free(shader->xfb); } diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index 793509ee4ab..9e7d2430a3a 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -25,66 +25,16 @@ struct vk_shader_module; #define TU102_SHADER_HEADER_SIZE (32 * 4) #define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE -struct nvk_transform_feedback_state { - uint32_t stride[4]; - uint8_t stream[4]; - uint8_t varying_count[4]; - uint8_t varying_index[4][128]; -}; - struct nvk_shader { - gl_shader_stage stage; + struct nak_shader_info info; struct nak_shader_bin *nak; - uint8_t *code_ptr; + const void *code_ptr; uint32_t code_size; uint32_t upload_size; uint64_t upload_addr; uint32_t upload_padding; - - uint8_t num_gprs; - uint8_t num_barriers; - uint32_t slm_size; - - uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4]; - uint32_t flags[2]; - - struct { - uint32_t clip_mode; /* clip/cull selection */ - uint8_t clip_enable; /* mask of defined clip planes */ - uint8_t cull_enable; /* mask of defined cull distances */ - uint8_t edgeflag; /* attribute index of edgeflag input */ - bool need_vertex_id; - bool need_draw_parameters; - bool layer_viewport_relative; /* also applies go gp and tp */ - } vs; - - struct { - uint8_t early_z; - uint8_t colors; - uint8_t color_interp[2]; - bool sample_mask_in; - bool uses_sample_shading; - bool force_persample_interp; - bool flatshade; - bool reads_framebuffer; - bool post_depth_coverage; - bool msaa; - } fs; - - struct { - uint32_t domain_type; /* ~0 if params defined by the other stage */ - uint32_t spacing; - uint32_t output_prims; - } tp; - - struct { - uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ - uint32_t block_size[3]; - } cp; - - struct nvk_transform_feedback_state *xfb; }; static inline uint64_t
