--- src/gallium/drivers/r600/r600_llvm.c | 126 ++++++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_shader.c | 11 ++- src/gallium/drivers/radeon/radeon_llvm.h | 1 + 3 files changed, 127 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 5efde2d..435da67 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -233,7 +233,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) struct lp_build_context * base = &bld_base->base; struct pipe_stream_output_info * so = ctx->stream_outputs; unsigned i; - + unsigned next_pos = 0; + unsigned next_param = 0; + unsigned color_count = 0; boolean has_color = false; @@ -268,14 +270,120 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) ctx->soa.outputs[i][chan], ""); if (ctx->type == TGSI_PROCESSOR_VERTEX) { - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); - lp_build_intrinsic_binary( - base->gallivm->builder, - "llvm.AMDGPU.store.output", - LLVMVoidTypeInContext(base->gallivm->context), - output, reg_index); + switch (ctx->r600_outputs[i].name) { + case TGSI_SEMANTIC_POSITION: { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_pos++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.position", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + break; + } + case TGSI_SEMANTIC_PSIZE: { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_pos++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.position", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + break; + } + case TGSI_SEMANTIC_CLIPVERTEX: { + LLVMValueRef elements[4]; + LLVMValueRef args[2]; + unsigned reg_index; + unsigned base_vector_chan; + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + elements[chan] = LLVMBuildLoad(base->gallivm->builder, + ctx->soa.outputs[i][chan], ""); + } + LLVMValueRef cv = lp_build_gather_values(bld_base->base.gallivm, + elements, 4); + for (reg_index = 0; reg_index < 2; reg_index ++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (base_vector_chan = 0; base_vector_chan < TGSI_NUM_CHANNELS; base_vector_chan++) { + unsigned base_vector_subid = reg_index * 16 + chan * 4 + base_vector_chan; + args[0] = lp_build_const_int32(bld_base->base.gallivm, base_vector_subid); + args[1] = lp_build_const_int32(bld_base->base.gallivm, R600_UCP_CONST_BUFFER); + elements[base_vector_chan] = build_intrinsic(bld_base->base.gallivm->builder, + "llvm.AMDGPU.load.const", bld_base->base.elem_type, + args, 2, LLVMReadNoneAttribute); + } + LLVMValueRef base_vector = lp_build_gather_values(bld_base->base.gallivm, + elements, 4); + args[0] = cv; + args[1] = base_vector; + LLVMValueRef scalar_store = build_intrinsic(bld_base->base.gallivm->builder, + "llvm.AMDGPU.dp4", bld_base->base.elem_type, + args, 2, LLVMReadNoneAttribute); + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_pos++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.position", + LLVMVoidTypeInContext(base->gallivm->context), + scalar_store, reg_index); + } + } + // Leave the loop + chan = 4; + break; + } + case TGSI_SEMANTIC_CLIPDIST: { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_pos++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.position", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + LLVMValueRef param_index = lp_build_const_int32( + base->gallivm, next_param++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.param", + LLVMVoidTypeInContext(base->gallivm->context), + output, param_index); + break; + } + case TGSI_SEMANTIC_FOG: { + LLVMValueRef fog_value; + switch (chan) { + case 0: + fog_value = output; + break; + case 1: + case 2: + fog_value = lp_build_const_float(base->gallivm, 0.0f); + break; + case 3: + fog_value = lp_build_const_float(base->gallivm, 1.0f); + break; + }; + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_param++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.param", + LLVMVoidTypeInContext(base->gallivm->context), + fog_value, reg_index); + break; + } + // Param + default: { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, next_param++); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.vertex.param", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + break; + } + } } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { switch (ctx->r600_outputs[i].name) { case TGSI_SEMANTIC_COLOR: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6eda1e1..b53d6bf 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -327,6 +327,11 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, break; } + if (alu.src[1].sel >= 160 && alu.src[1].sel <= 168) { + alu.src[1].sel += 352; + alu.src[1].kc_bank = R600_UCP_CONST_BUFFER; + } + for(src_idx = 0; src_idx < 3; src_idx++) { if (src_const_reg[src_idx]) { alu.src[src_idx].kc_bank = src_const_reg[src_idx] - 1; @@ -1367,6 +1372,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, radeon_llvm_ctx.chip_class = ctx.bc->chip_class; radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); radeon_llvm_ctx.stream_outputs = &so; + radeon_llvm_ctx.clip_vertex = ctx.cv_output; mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { dump = 1; @@ -1507,7 +1513,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, alu.dst.write = (j == ochan); if (j == 3) alu.last = 1; - r = r600_bytecode_add_alu(ctx.bc, &alu); + if (!use_llvm) + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -1736,7 +1743,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } } /* add output to bytecode */ - if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) { + if (!use_llvm) { for (i = 0; i < noutput; i++) { r = r600_bytecode_add_output(ctx.bc, &output[i]); if (r) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index c3d691a..5bce368 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -58,6 +58,7 @@ struct radeon_llvm_context { unsigned type; unsigned face_input; unsigned two_side; + unsigned clip_vertex; struct r600_shader_io * r600_inputs; struct r600_shader_io * r600_outputs; struct pipe_stream_output_info *stream_outputs; -- 1.8.0.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev