ROUND and TRUNC are implemented with one function to reduce code duplication. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 69 ++++++++++++++++++++++------- 1 files changed, 53 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index a2d6b51..eec40c7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -632,11 +632,11 @@ create_zero_immediate( struct svga_shader_emitter *emit ) { unsigned idx = emit->nr_hw_float_const++; - /* Emit the constant (0, 0, -1, 1) and use swizzling to generate + /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate * other useful vectors. */ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, - idx, 0, 0, -1, 1 )) + idx, 0, 0.5, -1, 1 )) return FALSE; emit->zero_immediate_idx = idx; @@ -732,6 +732,16 @@ get_pos_neg_one_immediate( struct svga_shader_emitter *emit ) 3, 3, 3, 2); } +/* returns {0.5, 0.5, 0.5, 0.5} immediate */ +static INLINE struct src_register +get_half_immediate( struct svga_shader_emitter *emit ) +{ + assert(emit->created_zero_immediate); + assert(emit->zero_immediate_idx >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->zero_immediate_idx), + 1, 1, 1, 1); +} + /* returns the loop const */ static INLINE struct src_register get_loop_const( struct svga_shader_emitter *emit ) @@ -2376,34 +2386,57 @@ static boolean emit_log(struct svga_shader_emitter *emit, /** - * Translate TGSI TRUNC instruction. + * Translate TGSI TRUNC or ROUND instruction. * We need to truncate toward zero. Ex: trunc(-1.9) = -1 * Different approaches are needed for VS versus PS. */ static boolean -emit_trunc(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_trunc_round(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + boolean round) { SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); const struct src_register src0 = translate_src_register(emit, &insn->Src[0] ); SVGA3dShaderDestToken t1 = get_temp(emit); - /* t1 = fract(abs(src0)) */ - if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) - return FALSE; + if (round) { + SVGA3dShaderDestToken t0 = get_temp(emit); + struct src_register half = get_half_immediate(emit); - /* t1 = abs(src0) - t1 */ - if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), - negate(src(t1)))) - return FALSE; + /* t0 = abs(src0) + 0.5 */ + if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0, + absolute(src0), half)) + return FALSE; + + /* t1 = fract(t0) */ + if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0))) + return FALSE; + + /* t1 = t0 - t1 */ + if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0), + negate(src(t1)))) + return FALSE; + } + else { + /* trunc */ + + /* t1 = fract(abs(src0)) */ + if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) + return FALSE; + + /* t1 = abs(src0) - t1 */ + if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), + negate(src(t1)))) + return FALSE; + } /* * Now we need to multiply t1 by the sign of the original value. */ if (emit->unit == PIPE_SHADER_VERTEX) { /* For VS: use SGN instruction */ - /* Need another temp plus two extra/dummy registers */ + /* Need two extra/dummy registers: */ SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit), t4 = get_temp(emit); @@ -2519,7 +2552,12 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, return emit_floor( emit, insn ); case TGSI_OPCODE_TRUNC: - return emit_trunc( emit, insn ); + return emit_trunc_round( emit, insn, FALSE ); + + case TGSI_OPCODE_ROUNDEVEN: + /* Note: ROUNDEVEN not properly implemented yet */ + case TGSI_OPCODE_ROUND: + return emit_trunc_round( emit, insn, TRUE ); case TGSI_OPCODE_CEIL: return emit_ceil( emit, insn ); @@ -2612,8 +2650,6 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, * about: */ case TGSI_OPCODE_CLAMP: - case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ROUNDEVEN: case TGSI_OPCODE_AND: case TGSI_OPCODE_OR: case TGSI_OPCODE_I2F: @@ -3102,6 +3138,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || -- 1.7.3.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev