Implement helper functions that can be used to construct and send untyped and typed surface read, write and atomic messages to the shared dataport unit. --- src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 268 +++++++++++++++++++++ 1 file changed, 268 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h index b6890b4..e24e484 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h +++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h @@ -283,6 +283,274 @@ namespace brw { size); } } + + namespace surface_access { + namespace detail { + using namespace array_utils; + + /** + * Generate a send opcode for a surface message and return the + * result. + */ + template<typename B, typename S> + array_reg + emit_send(const B &vbld, enum opcode opcode, + const array_reg &payload, const S &surface, const S &arg, + unsigned rlen, brw_predicate pred = BRW_PREDICATE_NONE) + { + const typename B::scalar_builder bld = vbld.scalar(); + const typename B::scalar_builder::dst_reg usurface = + bld.scalar_reg(BRW_REGISTER_TYPE_UD); + const array_reg dst = + (rlen ? bld.array_reg(BRW_REGISTER_TYPE_UD, rlen) : + array_reg(bld.null_reg_ud())); + + /* Reduce the dynamically uniform surface index to a single + * scalar. + */ + bld.emit_uniformize(usurface, component(surface, 0)); + + typename B::scalar_builder::instruction *inst = + bld.emit(opcode, bld.natural_reg(dst), bld.natural_reg(payload), + usurface, component(arg, 0)); + inst->mlen = payload.size; + inst->regs_written = rlen; + inst->predicate = pred; + + return dst; + } + + /** + * Initialize the header present in untyped surface messages. + */ + inline array_reg + emit_untyped_message_header(const svec4_builder &bld) + { + fs_builder ubld = bld.scalar().half(0); + const fs_reg dst = ubld.scalar_reg(BRW_REGISTER_TYPE_UD); + exec_all(ubld.MOV(dst, fs_reg(0))); + exec_all(ubld.MOV(channel(dst, 7), ubld.sample_mask_reg())); + return array_reg(dst); + } + + inline array_reg + emit_untyped_message_header(const vec4_builder &bld) + { + return array_reg(); + } + } + + /** + * Emit an untyped surface read opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the returned value. + */ + template<typename B, typename S> + S + emit_untyped_read(const B &bld, const S &surface, const S &addr, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE) + { + using namespace detail; + const vector_layout layout(bld, true, true); + const array_reg payload = + emit_collect(bld, + emit_untyped_message_header(bld), + emit_insert(layout, bld, addr, dims)); + const unsigned rlen = (DIV_ROUND_UP(size, S::traits::chan_size) * + bld.dispatch_width() / 8); + const array_reg dst = + emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, + payload, surface, S(size), rlen, pred); + + return emit_extract(layout, bld, &dst, size); + } + + /** + * Emit an untyped surface write opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the argument. + */ + template<typename B, typename S> + void + emit_untyped_write(const B &bld, const S &surface, const S &addr, const S &src, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE) + { + using namespace detail; + const vector_layout layout( + bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, true); + const array_reg payload = + emit_collect(bld, + emit_untyped_message_header(bld), + emit_insert(layout, bld, addr, dims), + emit_insert(layout, bld, src, size)); + + emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, + payload, surface, S(size), 0, pred); + } + + /** + * Emit an untyped surface atomic opcode. \p dims determines the number + * of components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + template<typename B, typename S> + S + emit_untyped_atomic(const B &bld, const S &surface, const S &addr, + const S &src0, const S &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE) + { + using namespace detail; + const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const vector_layout layout( + bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, true); + /* Zip the components of both sources, they are represented as the X + * and Y components of the same vector. + */ + const S srcs = bld.natural_reg(emit_zip(bld, emit_flatten(bld, src0, 1), + emit_flatten(bld, src1, 1), 1)); + const array_reg payload = + emit_collect(bld, + emit_untyped_message_header(bld), + emit_insert(layout, bld, addr, dims), + emit_insert(layout, bld, srcs, size)); + const array_reg dst = + emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, + payload, surface, S(op), + rsize * bld.dispatch_width() / 8, pred); + + return emit_extract(layout, bld, &dst, rsize); + } + + namespace detail { + /** + * Initialize the header present in typed surface messages. + */ + inline array_reg + emit_typed_message_header(const svec4_builder &bld) + { + return emit_untyped_message_header(bld); + } + + inline array_reg + emit_typed_message_header(const vec4_builder &bld) + { + const dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + + exec_all(bld.MOV(dst, src_reg(0))); + + if (bld.devinfo->gen == 7 && !bld.devinfo->is_haswell) { + /* The sample mask is used on IVB for the SIMD8 messages that + * have no SIMD4x2 variant. We only use the two X channels + * in that case, mask everything else out. + */ + exec_all(bld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11))); + } + + return array_reg(dst); + } + } + + /** + * Emit a typed surface read opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * returned value. + */ + template<typename B, typename S> + S + emit_typed_read(const B &bld, const S &surface, const S &addr, + unsigned dims, unsigned size) + { + using namespace detail; + const vector_layout layout( + bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false); + const unsigned rlen = DIV_ROUND_UP(size, S::traits::chan_size); + array_reg dsts[2]; + + for (unsigned i = 0; i < layout.halves; ++i) { + /* Get a half builder for this half if required. */ + const B ubld = (layout.halves > 1 ? bld.half(i) : bld); + const array_reg payload = + emit_collect(ubld, + emit_typed_message_header(ubld), + emit_insert(layout, bld, addr, dims, i)); + + dsts[i] = emit_send(ubld, SHADER_OPCODE_TYPED_SURFACE_READ, + payload, surface, S(size), rlen); + } + + return emit_extract(layout, bld, dsts, size); + } + + /** + * Emit a typed surface write opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * argument. + */ + template<typename B, typename S> + void + emit_typed_write(const B &bld, const S &surface, const S &addr, + const S &src, unsigned dims, unsigned size) + { + using namespace detail; + const vector_layout layout( + bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false); + + for (unsigned i = 0; i < layout.halves; ++i) { + /* Get a half builder for this half if required. */ + const B ubld = (layout.halves > 1 ? bld.half(i) : bld); + const array_reg payload = + emit_collect(ubld, + emit_typed_message_header(ubld), + emit_insert(layout, bld, addr, dims, i), + emit_insert(layout, bld, src, size, i)); + + emit_send(ubld, SHADER_OPCODE_TYPED_SURFACE_WRITE, + payload, surface, S(size), 0); + } + } + + /** + * Emit a typed surface atomic opcode. \p dims determines the number of + * components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + template<typename B, typename S> + S + emit_typed_atomic(const B &bld, const S &surface, const S &addr, + const S &src0, const S &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE) + { + using namespace detail; + const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const vector_layout layout( + bld, bld.devinfo->gen >= 8 || bld.devinfo->is_haswell, false); + /* Zip the components of both sources, they are represented as the X + * and Y components of the same vector. + */ + const S srcs = bld.natural_reg(emit_zip(bld, emit_flatten(bld, src0, 1), + emit_flatten(bld, src1, 1), 1)); + array_reg dsts[2]; + + for (unsigned i = 0; i < layout.halves; ++i) { + /* Get a half builder for this half if required. */ + const B ubld = (layout.halves > 1 ? bld.half(i) : bld); + const array_reg payload = + emit_collect(ubld, + emit_typed_message_header(ubld), + emit_insert(layout, bld, addr, dims, i), + emit_insert(layout, bld, srcs, size, i)); + + dsts[i] = emit_send(ubld, SHADER_OPCODE_TYPED_ATOMIC, + payload, surface, S(op), rsize, pred); + } + + return emit_extract(layout, bld, dsts, rsize); + } + } } #endif -- 2.3.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev