v3: fix compiler warnings v4: use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 276 +++++++++++++++++++-- 1 file changed, 258 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 9105cddf93..f7b51339c2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -87,6 +87,8 @@ public: LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); + ImgFormat convertGLImgFormat(GLuint); + // nir_alu_src needs special handling due to neg and abs modifiers Value* getSrc(nir_alu_src *, uint8_t component = 0); Value* getSrc(nir_register *, uint8_t); @@ -141,6 +143,7 @@ public: // tex stuff Value* applyProjection(Value *src, Value *proj); + unsigned int getNIRArgCount(TexInstruction::Target&); private: nir_shader *nir; @@ -435,28 +438,31 @@ Converter::getSubOp(nir_op op) } } +#define CASE_OP_INTR_ATOM(nir, nvir) \ + case nir_intrinsic_image_atomic_ ## nir : \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir +#define CASE_OP_INTR_ATOM_S(nir, nvir) \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { - case nir_intrinsic_ssbo_atomic_add: - return NV50_IR_SUBOP_ATOM_ADD; - case nir_intrinsic_ssbo_atomic_and: - return NV50_IR_SUBOP_ATOM_AND; - case nir_intrinsic_ssbo_atomic_comp_swap: - return NV50_IR_SUBOP_ATOM_CAS; - case nir_intrinsic_ssbo_atomic_exchange: - return NV50_IR_SUBOP_ATOM_EXCH; - case nir_intrinsic_ssbo_atomic_or: - return NV50_IR_SUBOP_ATOM_OR; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - return NV50_IR_SUBOP_ATOM_MAX; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - return NV50_IR_SUBOP_ATOM_MIN; - case nir_intrinsic_ssbo_atomic_xor: - return NV50_IR_SUBOP_ATOM_XOR; + CASE_OP_INTR_ATOM(add, ADD); + CASE_OP_INTR_ATOM(and, AND); + CASE_OP_INTR_ATOM(comp_swap, CAS); + CASE_OP_INTR_ATOM(exchange, EXCH); + CASE_OP_INTR_ATOM(or, OR); + case nir_intrinsic_image_atomic_max: + CASE_OP_INTR_ATOM_S(imax, MAX); + CASE_OP_INTR_ATOM_S(umax, MAX); + case nir_intrinsic_image_atomic_min: + CASE_OP_INTR_ATOM_S(imin, MIN); + CASE_OP_INTR_ATOM_S(umin, MIN); + CASE_OP_INTR_ATOM(xor, XOR); case nir_intrinsic_vote_all: return NV50_IR_SUBOP_VOTE_ALL; case nir_intrinsic_vote_any: @@ -469,6 +475,8 @@ Converter::getSubOp(nir_intrinsic_op op) return 0; } } +#undef CASE_OP_INTR_ATOM +#undef CASE_OP_INTR_ATOM_S CondCode Converter::getCondCode(nir_op op) @@ -1595,6 +1603,68 @@ Converter::convert(nir_intrinsic_op intr) } } +ImgFormat +Converter::convertGLImgFormat(GLuint format) +{ +#define FMT_CASE(a, b) \ + case GL_ ## a: return nv50_ir::FMT_ ## b + + switch (format) { + FMT_CASE(NONE, NONE); + + FMT_CASE(RGBA32F, RGBA32F); + FMT_CASE(RGBA16F, RGBA16F); + FMT_CASE(RG32F, RG32F); + FMT_CASE(RG16F, RG16F); + FMT_CASE(R11F_G11F_B10F, R11G11B10F); + FMT_CASE(R32F, R32F); + FMT_CASE(R16F, R16F); + + FMT_CASE(RGBA32UI, RGBA32UI); + FMT_CASE(RGBA16UI, RGBA16UI); + FMT_CASE(RGB10_A2UI, RGB10A2UI); + FMT_CASE(RGBA8UI, RGBA8UI); + FMT_CASE(RG32UI, RG32UI); + FMT_CASE(RG16UI, RG16UI); + FMT_CASE(RG8UI, RG8UI); + FMT_CASE(R32UI, R32UI); + FMT_CASE(R16UI, R16UI); + FMT_CASE(R8UI, R8UI); + + FMT_CASE(RGBA32I, RGBA32I); + FMT_CASE(RGBA16I, RGBA16I); + FMT_CASE(RGBA8I, RGBA8I); + FMT_CASE(RG32I, RG32I); + FMT_CASE(RG16I, RG16I); + FMT_CASE(RG8I, RG8I); + FMT_CASE(R32I, R32I); + FMT_CASE(R16I, R16I); + FMT_CASE(R8I, R8I); + + FMT_CASE(RGBA16, RGBA16); + FMT_CASE(RGB10_A2, RGB10A2); + FMT_CASE(RGBA8, RGBA8); + FMT_CASE(RG16, RG16); + FMT_CASE(RG8, RG8); + FMT_CASE(R16, R16); + FMT_CASE(R8, R8); + + FMT_CASE(RGBA16_SNORM, RGBA16_SNORM); + FMT_CASE(RGBA8_SNORM, RGBA8_SNORM); + FMT_CASE(RG16_SNORM, RG16_SNORM); + FMT_CASE(RG8_SNORM, RG8_SNORM); + FMT_CASE(R16_SNORM, R16_SNORM); + FMT_CASE(R8_SNORM, R8_SNORM); + + FMT_CASE(BGRA_INTEGER, BGRA8); + default: + ERROR("unknown format %x\n", format); + assert(false); + return nv50_ir::FMT_NONE; + } +#undef FMT_CASE +} + bool Converter::visit(nir_intrinsic_instr *insn) { @@ -1856,6 +1926,28 @@ Converter::visit(nir_intrinsic_instr *insn) info->io.globalAccess |= 0x1; break; } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: { + const DataType dType = getDType(insn); + LValues &newDefs = convert(&insn->dest); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, &indirectOffset); + Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset); + Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0)); + if (op == nir_intrinsic_shared_atomic_comp_swap) + atom->setSrc(2, getSrc(&insn->src[2], 0)); + atom->setIndirect(0, 0, indirectOffset); + atom->subOp = getSubOp(op); + break; + } case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_and: case nir_intrinsic_ssbo_atomic_comp_swap: @@ -1883,6 +1975,145 @@ Converter::visit(nir_intrinsic_instr *insn) info->io.globalAccess |= 0x2; break; } + case nir_intrinsic_image_store: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = convert((glsl_sampler_dim)type->sampler_dimensionality, type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, dummy; + unsigned int argCount = getNIRArgCount(target); + + srcs.resize(argCount + 4); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + for (auto i = 0u; i < 4; ++i) + srcs[argCount + i] = getSrc(&insn->src[2], i); + + if (tex->data.driver_location == 0xffffffff) { + ERROR("image_store with indirect not supported\n"); + assert(false); + } + + TexInstruction *texi = mkTex(OP_SUSTP, target.getEnum(), tex->data.driver_location, 0, dummy, srcs); + texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)]; + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_image_load: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = convert((glsl_sampler_dim)type->sampler_dimensionality, type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + uint32_t mask = 0; + LValues &newDefs = convert(&insn->dest); + unsigned int argCount = getNIRArgCount(target); + + defs.resize(newDefs.size()); + for (auto i = 0u; i < newDefs.size(); ++i) { + defs[i] = newDefs[i]; + mask |= 1 << i; + } + + srcs.resize(argCount); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + + if (tex->data.driver_location == 0xffffffff) { + ERROR("image_load with indirect not supported\n"); + assert(false); + } + + TexInstruction *texi = mkTex(OP_SULDP, target.getEnum(), tex->data.driver_location, 0, defs, srcs); + texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)]; + texi->tex.mask = mask; + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_image_size: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = convert((glsl_sampler_dim)type->sampler_dimensionality, type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + uint32_t mask = 0; + LValues &newDefs = convert(&insn->dest); + + defs.resize(newDefs.size()); + for (auto i = 0u; i < newDefs.size(); ++i) { + defs[i] = newDefs[i]; + mask |= 1 << i; + } + + mkTex(OP_SUQ, target.getEnum(), tex->data.driver_location, 0, defs, srcs)->tex.mask = mask; + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: { + const DataType dType = getDType(insn); + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = convert((glsl_sampler_dim)type->sampler_dimensionality, type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + LValues &newDefs = convert(&insn->dest); + unsigned int argCount = getNIRArgCount(target); + + defs.push_back(newDefs[0]); + + srcs.resize(argCount); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + srcs.push_back(getSrc(&insn->src[2], 0)); + if (op == nir_intrinsic_image_atomic_comp_swap) + srcs.push_back(getSrc(&insn->src[3], 0)); + + TexInstruction *texi = mkTex(OP_SUREDP, target.getEnum(), tex->data.driver_location, 0, defs, srcs); + texi->tex.mask = 1; + texi->setType(dType); + texi->subOp = getSubOp(op); + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_store_shared: { + DataType sType = getSType(insn->src[0], false, false); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[1], 0, &indirectOffset); + + for (auto i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType)); + mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i)); + } + break; + } + case nir_intrinsic_load_shared: { + const DataType dType = getDType(insn); + LValues &newDefs = convert(&insn->dest); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, &indirectOffset); + + for (auto i = 0u; i < insn->num_components; ++i) + loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset); + + break; + } + case nir_intrinsic_barrier: { + // TODO: add flag to shader_info + info->numBarriers = 1; + Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); + bar->fixed = 1; + bar->subOp = NV50_IR_SUBOP_BAR_SYNC; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; @@ -2310,6 +2541,15 @@ Converter::applyProjection(Value *src, Value *proj) return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj); } +unsigned int +Converter::getNIRArgCount(TexInstruction::Target& target) +{ + unsigned int result = target.getArgCount(); + if (target.isCube() && target.isArray()) + return result - 1; + return result; +} + bool Converter::visit(nir_tex_instr *insn) { -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev