[Mesa-dev] [PATCH 1/3] gallivm: support printing of 64 bit integers
only 8 and 32 bit integers were supported before. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_printf.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index 1324da2..d06209a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -106,7 +106,11 @@ lp_build_print_value(struct gallivm_state *gallivm, type_fmt[4] = 'g'; type_fmt[5] = '\0'; } else if (type_kind == LLVMIntegerTypeKind) { - if (LLVMGetIntTypeWidth(type_ref) == 8) { + if (LLVMGetIntTypeWidth(type_ref) == 64) { + type_fmt[2] = 'l'; + type_fmt[3] = 'd'; + type_fmt[4] = '\0'; + } else if (LLVMGetIntTypeWidth(type_ref) == 8) { type_fmt[2] = 'u'; } else { type_fmt[2] = 'i'; -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] gallium: Add support for 32x32 muls with 64 bit results
The code introduces two new 32bit integer multiplication opcodes which can be used to produce correct 64 bit results. GLSL, OpenCL and D3D10+ require them. We use two seperate opcodes, because they match the behavior of GLSL and OpenCL, are a lot easier to add than a single opcode with multiple destinations and because there's not much (any) difference wrt code-generation. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 34 ++ src/gallium/auxiliary/tgsi/tgsi_info.c | 6 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 3 ++ src/gallium/auxiliary/tgsi/tgsi_util.c | 2 ++ src/gallium/docs/source/tgsi.rst | 30 +++ src/gallium/include/pipe/p_shader_tokens.h | 5 +++- .../tests/graw/vertex-shader/vert-imul_hi.sh | 13 + .../tests/graw/vertex-shader/vert-umul_hi.sh | 11 +++ 8 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh create mode 100644 src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0750a50..6db1238 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3478,6 +3478,32 @@ micro_umul(union tgsi_exec_channel *dst, } static void +micro_imul_hi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ +#define I64M(x, y) int64_t)x) * ((int64_t)y)) >> 32) + dst->i[0] = I64M(src0->i[0], src1->i[0]); + dst->i[1] = I64M(src0->i[1], src1->i[1]); + dst->i[2] = I64M(src0->i[2], src1->i[2]); + dst->i[3] = I64M(src0->i[3], src1->i[3]); +#undef I64M +} + +static void +micro_umul_hi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ +#define U64M(x, y) uint64_t)x) * ((uint64_t)y)) >> 32) + dst->u[0] = U64M(src0->u[0], src1->u[0]); + dst->u[1] = U64M(src0->u[1], src1->u[1]); + dst->u[2] = U64M(src0->u[2], src1->u[2]); + dst->u[3] = U64M(src0->u[3], src1->u[3]); +#undef U64M +} + +static void micro_useq(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1) @@ -4277,6 +4303,14 @@ exec_instruction( exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; + case TGSI_OPCODE_IMUL_HI: + exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_UMUL_HI: + exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_USEQ: exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 7a5d18f..0beef44 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -219,6 +219,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, { 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, + { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, + { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, }; const struct tgsi_opcode_info * @@ -297,6 +299,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_USLT: case TGSI_OPCODE_USNE: case TGSI_OPCODE_SVIEWINFO: + case TGSI_OPCODE_UMUL_HI: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: @@ -317,6 +320,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_UARL: case TGSI_OPCODE_IABS: case TGSI_OPCODE_ISSG: + case TGSI_OPCODE_IMUL_HI: return TGSI_TYPE_SIGNED; default: return TGSI_TYPE_FLOAT; @@ -339,7 +343,9 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_CASE: case TGSI_OPCODE_SAMPLE_I: case TGSI_OPCODE_SAMPLE_I_MS: + case TGSI_OPCODE_UMUL_HI: return TGSI_TYPE_UNSIGNED; + case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_ARL: diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b8144a8..1ef78dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -204,6 +204,9 @@ OP12(SAMPLE_INFO) OP13(UCMP) +OP12(IMUL_HI) +OP12(UMUL_HI) + #undef OP00 #undef OP01 #undef OP10 diff --git a/src/
[Mesa-dev] [PATCH 3/3] llvmpipe: implement 64 bit mul opcodes in llvmpipe
Both the imul_hi and umul_hi are working with this patch. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 60 ++ 1 file changed, 60 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 1cfaf78..8caaf83 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -763,6 +763,64 @@ umul_emit( emit_data->args[0], emit_data->args[1]); } +/* TGSI_OPCODE_IMUL_HI */ +static void +imul_hi_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_build_context *int_bld = &bld_base->int_bld; + struct lp_type type = int_bld->type; + LLVMValueRef src0, src1; + LLVMValueRef dst64; + LLVMTypeRef typeRef; + + assert(type.width == 32); + type.width = 64; + typeRef = lp_build_vec_type(bld_base->base.gallivm, type); + src0 = LLVMBuildSExt(builder, emit_data->args[0], typeRef, ""); + src1 = LLVMBuildSExt(builder, emit_data->args[1], typeRef, ""); + dst64 = LLVMBuildMul(builder, src0, src1, ""); + dst64 = LLVMBuildAShr( +builder, dst64, +lp_build_const_vec(bld_base->base.gallivm, type, 32), ""); + type.width = 32; + typeRef = lp_build_vec_type(bld_base->base.gallivm, type); + emit_data->output[emit_data->chan] = + LLVMBuildTrunc(builder, dst64, typeRef, ""); +} + +/* TGSI_OPCODE_UMUL_HI */ +static void +umul_hi_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_type type = uint_bld->type; + LLVMValueRef src0, src1; + LLVMValueRef dst64; + LLVMTypeRef typeRef; + + assert(type.width == 32); + type.width = 64; + typeRef = lp_build_vec_type(bld_base->base.gallivm, type); + src0 = LLVMBuildZExt(builder, emit_data->args[0], typeRef, ""); + src1 = LLVMBuildZExt(builder, emit_data->args[1], typeRef, ""); + dst64 = LLVMBuildMul(builder, src0, src1, ""); + dst64 = LLVMBuildLShr( +builder, dst64, +lp_build_const_vec(bld_base->base.gallivm, type, 32), ""); + type.width = 32; + typeRef = lp_build_vec_type(bld_base->base.gallivm, type); + emit_data->output[emit_data->chan] = + LLVMBuildTrunc(builder, dst64, typeRef, ""); +} + /* TGSI_OPCODE_MAX */ static void fmax_emit( const struct lp_build_tgsi_action * action, @@ -894,6 +952,8 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit; bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit; bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit; + bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit; + bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit; bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit; bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit; -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: abstract the code to set number of subpixel bits
As we're moving towards expanding the number of subpixel bits and the width of the variables used in the computations we need to make this code a bit more centralized. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_rast.h | 9 + src/gallium/drivers/llvmpipe/lp_setup.c | 14 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c57f2ea..43c598d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -46,9 +46,18 @@ struct lp_scene; struct lp_fence; struct cmd_bin; +#define FIXED_TYPE_WIDTH 32 /** For sub-pixel positioning */ #define FIXED_ORDER 4 #define FIXED_ONE (1<draw_regions[i]); } } - /* If the framebuffer is large we have to think about fixed-point - * integer overflow. For 2K by 2K images, coordinates need 15 bits - * (2^11 + 4 subpixel bits). The product of two such numbers would - * use 30 bits. Any larger and we could overflow a 32-bit int. - * - * To cope with this problem we check if triangles are large and - * subdivide them if needed. + /* + * Subdivide triangles if the framebuffer is larger than the + * MAX_FIXED_LENGTH. */ - setup->subdivide_large_triangles = (setup->fb.width > 2048 || - setup->fb.height > 2048); + setup->subdivide_large_triangles = (setup->fb.width > MAX_FIXED_LENGTH || + setup->fb.height > MAX_FIXED_LENGTH); } setup->dirty = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 051ffa0..9cc81e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -988,7 +988,7 @@ check_subdivide_triangle(struct lp_setup_context *setup, const float (*v2)[4], triangle_func_t tri) { - const float maxLen = 2048.0f; /* longest permissible edge, in pixels */ + const float maxLen = MAX_FIXED_LENGTH; /* longest permissible edge, in pixels */ float dx10, dy10, len10; float dx21, dy21, len21; float dx02, dy02, len02; -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] graw: add a test rendering a huge triangle
Used to test rasterization, because we often breakdown on subdivision of triangles with long edges. Signed-off-by: Zack Rusin --- src/gallium/tests/graw/SConscript | 1 + src/gallium/tests/graw/tri-large.c | 173 + 2 files changed, 174 insertions(+) create mode 100644 src/gallium/tests/graw/tri-large.c diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 8740ff3..8723807 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -29,6 +29,7 @@ progs = [ 'tex-srgb', 'tex-swizzle', 'tri', +'tri-large', 'tri-gs', 'tri-instanced', 'vs-test', diff --git a/src/gallium/tests/graw/tri-large.c b/src/gallium/tests/graw/tri-large.c new file mode 100644 index 000..3fbbfb3 --- /dev/null +++ b/src/gallium/tests/graw/tri-large.c @@ -0,0 +1,173 @@ +/* Display a cleared blue window. This demo has no dependencies on + * any utility code, just the graw interface and gallium. + */ + +#include "graw_util.h" +#include "util/u_debug.h" + +#include + +static struct graw_info info; + +static const int WIDTH = 4*2048; +static const int HEIGHT = 4*2048; + + +struct vertex { + float position[4]; + float color[4]; +}; + +static boolean FlatShade = FALSE; + + +static struct vertex vertices[3] = +{ + { + { -1.0f, -1.0f, 0.0f, 1.0f }, + { 1.0f, 0.0f, 0.0f, 1.0f } + }, + { + { -1.0f, 1.0f, 0.0f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 1.0f, 0.0f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + } +}; + + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[2]; + struct pipe_vertex_buffer vbuf; + void *handle; + + memset(ve, 0, sizeof ve); + + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + handle = info.ctx->create_vertex_elements_state(info.ctx, 2, ve); + info.ctx->bind_vertex_elements_state(info.ctx, handle); + + memset(&vbuf, 0, sizeof vbuf); + + vbuf.stride = sizeof( struct vertex ); + vbuf.buffer_offset = 0; + vbuf.buffer = pipe_buffer_create_with_data(info.ctx, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(vertices), + vertices); + + info.ctx->set_vertex_buffers(info.ctx, 0, 1, &vbuf); +} + + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], COLOR\n" + " 0: MOV OUT[1], IN[1]\n" + " 1: MOV OUT[0], IN[0]\n" + " 2: END\n"; + + handle = graw_parse_vertex_shader(info.ctx, text); + info.ctx->bind_vs_state(info.ctx, handle); +} + + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + "FRAG\n" + "DCL IN[0], COLOR, LINEAR\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END\n"; + + handle = graw_parse_fragment_shader(info.ctx, text); + info.ctx->bind_fs_state(info.ctx, handle); +} + + +static void draw( void ) +{ + union pipe_color_union clear_color = { {1,0,1,1} }; + + info.ctx->clear(info.ctx, PIPE_CLEAR_COLOR, &clear_color, 0, 0); + util_draw_arrays(info.ctx, PIPE_PRIM_TRIANGLES, 0, 3); + info.ctx->flush(info.ctx, NULL, 0); + + graw_save_surface_to_file(info.ctx, info.color_surf[0], NULL); + + graw_util_flush_front(&info); +} + + +static void init( void ) +{ + if (!graw_util_create_window(&info, WIDTH, HEIGHT, 1, FALSE)) + exit(1); + + graw_util_default_state(&info, FALSE); + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(&rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.half_pixel_center = 1; + rasterizer.bottom_edge_rule = 1; + rasterizer.flatshade = FlatShade; + rasterizer.depth_clip = 1; + handle = info.ctx->create_rasterizer_state(info.ctx, &rasterizer); + info.ctx->bind_rasterizer_state(info.ctx, handle); + } + + + graw_util_viewport(&info, 0, 0, WIDTH, HEIGHT, 30, 1000); + + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); +} + +static void args(int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc; ) { + if (graw_parse_args(&i, argc, argv)) { + /* ok */ + } + else if (strcmp(argv[i], "-f") == 0
Re: [Mesa-dev] [PATCH] gallivm: deduplicate some indirect register address code
Looks good. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > There's only one minor functional change, for immediates the pixel offsets > are no longer added since the values are all the same for all elements in > any case (it might be better if those weren't stored as soa vectors in the > first place maybe). > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 253 > +-- > 1 file changed, 96 insertions(+), 157 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 75f6def..5f81066 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -898,6 +898,39 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base, > } > > static LLVMValueRef > +get_soa_array_offsets(struct lp_build_context *uint_bld, > + LLVMValueRef indirect_index, > + unsigned chan_index, > + boolean need_perelement_offset) > +{ > + struct gallivm_state *gallivm = uint_bld->gallivm; > + LLVMValueRef chan_vec = > + lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); > + LLVMValueRef length_vec = > + lp_build_const_int_vec(gallivm, uint_bld->type, > uint_bld->type.length); > + LLVMValueRef index_vec; > + > + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ > + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); > + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); > + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); > + > + if (need_perelement_offset) { > + LLVMValueRef pixel_offsets; > + int i; > + /* build pixel offset vector: {0, 1, 2, 3, ...} */ > + pixel_offsets = uint_bld->undef; > + for (i = 0; i < uint_bld->type.length; i++) { > + LLVMValueRef ii = lp_build_const_int32(gallivm, i); > + pixel_offsets = LLVMBuildInsertElement(gallivm->builder, > pixel_offsets, > +ii, ii, ""); > + } > + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); > + } > + return index_vec; > +} > + > +static LLVMValueRef > emit_fetch_constant( > struct lp_build_tgsi_context * bld_base, > const struct tgsi_full_src_register * reg, > @@ -908,7 +941,6 @@ emit_fetch_constant( > struct gallivm_state *gallivm = bld_base->base.gallivm; > LLVMBuilderRef builder = gallivm->builder; > struct lp_build_context *uint_bld = &bld_base->uint_bld; > - LLVMValueRef indirect_index = NULL; > unsigned dimension = 0; > LLVMValueRef dimension_index; > LLVMValueRef consts_ptr; > @@ -927,16 +959,15 @@ emit_fetch_constant( > consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, > dimension_index); > > if (reg->Register.Indirect) { > + LLVMValueRef indirect_index; > + LLVMValueRef swizzle_vec = > + lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); > + LLVMValueRef index_vec; /* index into the const buffer */ > + >indirect_index = get_indirect_index(bld, >reg->Register.File, >reg->Register.Index, >®->Indirect); > - } > - > - if (reg->Register.Indirect) { > - LLVMValueRef swizzle_vec = > - lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, > swizzle); > - LLVMValueRef index_vec; /* index into the const buffer */ > >/* index_vec = indirect_index * 4 + swizzle */ >index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); > @@ -949,7 +980,7 @@ emit_fetch_constant( >LLVMValueRef index; /* index into the const buffer */ >LLVMValueRef scalar, scalar_ptr; > > - index = lp_build_const_int32(gallivm, reg->Register.Index*4 + > swizzle); > + index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + > swizzle); > >scalar_ptr = LLVMBuildGEP(builder, consts_ptr, > &index, 1, ""); > @@ -974,49 +1005,32 @@ emit_fetch_immediate( > struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); > struct gallivm_state *gallivm = bld->bld_base.base.gallivm; > LLVMBuilderRef builder = gallivm->builder; > - struct lp_build_context *uint_bld = &bld_base->uint_bld; > - struct lp_build_context *fl
Re: [Mesa-dev] [PATCH] gallivm: Compile flag to debug TGSI execution through printfs.
That's very nice Jose! Looks good to me. - Original Message - > From: José Fonseca > > It is similar to tgsi_exec.c's DEBUG_EXECUTION compile flag. > > I had prototyped this for a while while debugging an issue, but finally > cleaned this up and added a few more bells and whistles. > > Here is a sample output. > > CONST[0]: > X: 0.006250 0.006250 0.006250 0.006250 > Y: -0.007143 -0.007143 -0.007143 -0.007143 > Z: -1.00 -1.00 -1.00 -1.00 > W: 1.00 1.00 1.00 1.00 > IN[0]: > X: 143.50 175.50 175.50 143.50 > Y: 123.50 123.50 155.50 155.50 > Z: 0.00 0.00 0.00 0.00 > W: 1.00 1.00 1.00 1.00 > > 1: RCP TEMP[0].w, IN[0]. > TEMP[0].w = 1 1 1 1 > > 2: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw > TEMP[0].x = -0.103124976 0.0968750715 0.0968750715 -0.103124976 > TEMP[0].y = 0.117857158 0.117857158 -0.110714316 -0.110714316 > > 3: MUL OUT[0].xy, TEMP[0], TEMP[0]. > OUT[0].x = -0.103124976 0.0968750715 0.0968750715 -0.103124976 > OUT[0].y = 0.117857158 0.117857158 -0.110714316 -0.110714316 > > 4: MUL OUT[0].z, IN[0]., TEMP[0]. > OUT[0].z = 0 0 0 0 > > 5: MOV OUT[0].w, TEMP[0] > OUT[0].w = 1 1 1 1 > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 158 > +++- > src/gallium/auxiliary/tgsi/tgsi_dump.c | 23 > src/gallium/auxiliary/tgsi/tgsi_dump.h | 7 ++ > 3 files changed, 159 insertions(+), 29 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 5f81066..917826d 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -47,6 +47,7 @@ > #include "tgsi/tgsi_parse.h" > #include "tgsi/tgsi_util.h" > #include "tgsi/tgsi_scan.h" > +#include "tgsi/tgsi_strings.h" > #include "lp_bld_tgsi_action.h" > #include "lp_bld_type.h" > #include "lp_bld_const.h" > @@ -67,6 +68,17 @@ > > #define DUMP_GS_EMITS 0 > > +/* > + * If non-zero, the generated LLVM IR will print intermediate results on > every TGSI > + * instruction. > + * > + * TODO: > + * - take execution masks in consideration > + * - debug control-flow instructions > + */ > +#define DEBUG_EXECUTION 0 > + > + > static void lp_exec_mask_init(struct lp_exec_mask *mask, struct > lp_build_context *bld) > { > LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context); > @@ -664,6 +676,43 @@ static void lp_exec_mask_endsub(struct lp_exec_mask > *mask, int *pc) > } > > > +static LLVMValueRef > +get_file_ptr(struct lp_build_tgsi_soa_context *bld, > + unsigned file, > + unsigned index, > + unsigned chan) > +{ > + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > + LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; > + LLVMValueRef var_of_array; > + > + switch (file) { > + case TGSI_FILE_TEMPORARY: > + array_of_vars = bld->temps; > + var_of_array = bld->temps_array; > + break; > + case TGSI_FILE_OUTPUT: > + array_of_vars = bld->outputs; > + var_of_array = bld->outputs_array; > + break; > + default: > + assert(0); > + return NULL; > + } > + > + assert(chan < 4); > + > + if (bld->indirect_files & (1 << file)) { > + LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, > index * 4 + chan); > + return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); > + } > + else { > + assert(index <= bld->bld_base.info->file_max[file]); > + return array_of_vars[index][chan]; > + } > +} > + > + > /** > * Return pointer to a temporary register channel (src or dest). > * Note that indirect addressing cannot be handled here. > @@ -675,15 +724,7 @@ lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context > *bld, > unsigned index, > unsigned chan) > { > - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > - assert(chan < 4); > - if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { > - LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, > index * 4 + chan); > - return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); > - } > - else { > - return bld->temps[index][chan]; > - } > + return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); > } > > /** > @@ -697,16 +738,7 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, > unsigned index, > unsigned chan) > { > - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > - assert(chan < 4); > - if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { > - LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, > - index * 4 + chan); > - return LLVMBuildGEP(builder, bld-
[Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision
8 bit precision is required by d3d10 but unfortunately requires 64 bit rasterizer. This commit implements 64 bit rasterization with full support for 8bit subpixel precision. It's a combination of all individual commits from the llvmpipe-rast-64 branch. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_rast.c | 11 ++ src/gallium/drivers/llvmpipe/lp_rast.h | 47 +-- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 6 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h| 27 src/gallium/drivers/llvmpipe/lp_rast_tri.c | 173 + src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 56 src/gallium/drivers/llvmpipe/lp_setup_line.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c| 155 ++ src/gallium/tests/graw/SConscript | 1 + src/gallium/tests/graw/tri-large.c | 173 + 10 files changed, 500 insertions(+), 151 deletions(-) create mode 100644 src/gallium/tests/graw/tri-large.c diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index af661e9..0cd62c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -589,6 +589,17 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_begin_query, lp_rast_end_query, lp_rast_set_state, + lp_rast_triangle_32_1, + lp_rast_triangle_32_2, + lp_rast_triangle_32_3, + lp_rast_triangle_32_4, + lp_rast_triangle_32_5, + lp_rast_triangle_32_6, + lp_rast_triangle_32_7, + lp_rast_triangle_32_8, + lp_rast_triangle_32_3_4, + lp_rast_triangle_32_3_16, + lp_rast_triangle_32_4_16 }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 43c598d..b81d94f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -46,10 +46,11 @@ struct lp_scene; struct lp_fence; struct cmd_bin; -#define FIXED_TYPE_WIDTH 32 +#define FIXED_TYPE_WIDTH 64 /** For sub-pixel positioning */ -#define FIXED_ORDER 4 +#define FIXED_ORDER 8 #define FIXED_ONE (1< +#include "util/u_sse.h" + +static INLINE __m128i +lp_plane_to_m128i(const struct lp_rast_plane *plane) +{ + return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx, + (int32_t)plane->dcdy, (int32_t)plane->eo); +} + +#endif + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 3bc75aa..587c793 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -195,8 +195,8 @@ debug_triangle(int tilex, int tiley, while (plane_mask) { plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + -plane[nr_planes].dcdy * tiley - -plane[nr_planes].dcdx * tilex); +IMUL64(plane[nr_planes].dcdy, tiley) - +IMUL64(plane[nr_planes].dcdx, tilex)); nr_planes++; } @@ -217,7 +217,7 @@ debug_triangle(int tilex, int tiley, } for (i = 0; i < nr_planes; i++) { - plane[i].c += plane[i].dcdx * TILE_SIZE; + plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE); plane[i].c += plane[i].dcdy; } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 41fe097..77ec329 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -355,6 +355,33 @@ void lp_rast_triangle_3_16( struct lp_rasterizer_task *, void lp_rast_triangle_4_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *
Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision
> For me too, other than the fixed_position members, looks good. Thanks for > your perseverance on this Zack! Thanks! ok, attached is a version that makes position and dx/dy 32bit again, it seems to work great. I have a question for you guys if you run the piglits: ./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1 -auto on master does it fail for you? It fails for me on master, with and without the patch. I'm not sure what to make of it, I might have been looking at rasterization for too long. Looking at the rendering it looks correct. zFrom 55c9a288c7ebc37b32bc75526e6de71a838ccaef Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 24 Oct 2013 22:05:22 -0400 Subject: [PATCH] llvmpipe: support 8bit subpixel precision 8 bit precision is required by d3d10 but unfortunately requires 64 bit rasterizer. This commit implements 64 bit rasterization with full support for 8bit subpixel precision. It's a combination of all individual commits from the llvmpipe-rast-64 branch. --- src/gallium/drivers/llvmpipe/lp_rast.c | 11 ++ src/gallium/drivers/llvmpipe/lp_rast.h | 47 +-- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 6 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h| 27 src/gallium/drivers/llvmpipe/lp_rast_tri.c | 173 src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 56 src/gallium/drivers/llvmpipe/lp_setup_line.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c| 147 + src/gallium/tests/graw/SConscript | 1 + src/gallium/tests/graw/tri-large.c | 174 + 10 files changed, 496 insertions(+), 148 deletions(-) create mode 100644 src/gallium/tests/graw/tri-large.c diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index af661e9..0cd62c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -589,6 +589,17 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_begin_query, lp_rast_end_query, lp_rast_set_state, + lp_rast_triangle_32_1, + lp_rast_triangle_32_2, + lp_rast_triangle_32_3, + lp_rast_triangle_32_4, + lp_rast_triangle_32_5, + lp_rast_triangle_32_6, + lp_rast_triangle_32_7, + lp_rast_triangle_32_8, + lp_rast_triangle_32_3_4, + lp_rast_triangle_32_3_16, + lp_rast_triangle_32_4_16 }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 43c598d..b81d94f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -46,10 +46,11 @@ struct lp_scene; struct lp_fence; struct cmd_bin; -#define FIXED_TYPE_WIDTH 32 +#define FIXED_TYPE_WIDTH 64 /** For sub-pixel positioning */ -#define FIXED_ORDER 4 +#define FIXED_ORDER 8 #define FIXED_ONE (1< +#include "util/u_sse.h" + +static INLINE __m128i +lp_plane_to_m128i(const struct lp_rast_plane *plane) +{ + return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx, + (int32_t)plane->dcdy, (int32_t)plane->eo); +} + +#endif + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 3bc75aa..587c793 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -195,8 +195,8 @@ debug_triangle(int tilex, int tiley, while (plane_mask) { plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + -plane[nr_planes].dcdy * tiley - -plane[nr_planes].dcdx * tilex); +IMUL64(plane[nr_planes].dcdy, tiley) - +IMUL64(plane[nr_planes].dcdx, tilex)); nr_planes++; } @@ -217,7 +217,7 @@ debug_triangle(int tilex, int tiley, } for (i = 0; i < nr_planes; i++) { - plane[i].c += plane[i].dcdx * TILE_SIZE; + plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE); plane[i].c += plane[i].dcdy; } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 41fe097..77ec329 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -355,6 +355,33 @@ void lp_rast_triangle_3_16( struct lp_rasterizer_task *, void lp_rast_triangle_4_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void
Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency
That looks wrong. The total number of verts per buffer is the maximum number of verts that can be output per invocation (primitive_boundary) times number of invocations of geometry shader (num_in_primitives). It's not maximum number of verts that can be output per invocation (primitive_boundary) times maximum number of primitives output by geometry shader (max_out_prims). z - Original Message - > From: Dave Airlie > > This crashes on softpipe due to a lack of output memory allocated, > > it appears we allocate memory for enough primtives, but not vertices > so convert to number of vertices. > > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/draw/draw_gs.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/auxiliary/draw/draw_gs.c > b/src/gallium/auxiliary/draw/draw_gs.c > index fc4f697..0a9bf81 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.c > +++ b/src/gallium/auxiliary/draw/draw_gs.c > @@ -555,7 +555,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader > *shader, > /* we allocate exactly one extra vertex per primitive to allow the GS to > emit > * overflown vertices into some area where they won't harm anyone */ > unsigned total_verts_per_buffer = shader->primitive_boundary * > - num_in_primitives; > + max_out_prims * u_vertices_per_prim(shader->output_primitive); > > //Assume at least one primitive > max_out_prims = MAX2(max_out_prims, 1); > -- > 1.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency
I think the code is already correct and something else goes wrong. The tgsi geometry shader code was never done properly so it's more than likely that tgsi_exec is doing something wonky. Geometry shaders specify the maximum number of vertices that they can emit. That's what draw_geometry_shader::max_output_vertices is. If a geometry shader emits more than that, the verts will be ignored. So our primitive_boundary is max_output_vertices + 1 because we want to make sure that in SoA we have a scratch space where we can keep writing the overflowed vertices. So the worst case scenario for our output buffer is: (max_output_vertices + 1) * geometry shader invocations. That's what we have there now and that's correct. I don't remember what tgsi_exec does, I think I never even implemented proper SoA for gs in tgsi_exec, so if there's anything wrong I'd look for the bug there. z - Original Message - > On 11 June 2014 00:02, Zack Rusin wrote: > > That looks wrong. The total number of verts per buffer is the maximum > > number of verts that can be output per invocation (primitive_boundary) > > times number of invocations of geometry shader (num_in_primitives). > > > > It's not maximum number of verts that can be output per invocation > > (primitive_boundary) times maximum number of primitives output by geometry > > shader (max_out_prims). > > > > Okay so just adding * u_vertices_per_prim(shader->output_primitive); > would suffice? > > Dave > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency
> I'll revisit it today and see if I can spot something else wrong, it > fails for triangle adj because there are 6 vertices per primitive and > we have only malloced space for 4. It has to be something else because that's impossible, in fact it's 2x impossible ;) 1) It's illegal and impossible for geometry shader to emit adjacency primitives. Only points, lines and triangles can be emitted from gs. 2) The output primitive is irrelevant for the size of the buffer. If a geometry shader claims that the max output vertices is four, then it can, at most, emit 4 points, 2 lines or 1 triangle (incomplete primitives are discarded from geometry shader so the extra 4th vertex will be discarded). If a geometry shader claims to max emit 4 vertices and you try to emit 100 points, you will still get only 4 points (96 will be counted as overflowed but they won't be emitted). My advice would be to check what's in the output buffer with llvmpipe. If tgsi_exec doesn't match llvmpipe then there's a bug in tgsi_exec. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: avoid buffer overflows with bad geometry programs.
To be honest I still don't like it. While the tgsi_exec specific paths in draw_gs don't matter to me and can be as ugly as they need to be, they can't be polluting the draw_pt_emit code, in other words the primitive_lengths can't be bogus at that point - prim_info can't lie about the amount of data that it's holding. z - Original Message - > From: Dave Airlie > > One of the mismatched tests have a max output vertices of 3, > but emits 6 vertices, this means the output buffer is undersized > and causes problems down the line, so limit things later if we > have a number of vertices lower than the number required to execute > a primitive. > > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/draw/draw_gs.c | 4 ++-- > src/gallium/auxiliary/draw/draw_pt_emit.c | 8 +++- > 2 files changed, 9 insertions(+), 3 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_gs.c > b/src/gallium/auxiliary/draw/draw_gs.c > index fc4f697..d07e88f 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.c > +++ b/src/gallium/auxiliary/draw/draw_gs.c > @@ -92,8 +92,8 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, >unsigned num_verts_per_prim = machine->Primitives[prim_idx]; >shader->primitive_lengths[prim_idx + shader->emitted_primitives] = > machine->Primitives[prim_idx]; > - shader->emitted_vertices += num_verts_per_prim; > - for (j = 0; j < num_verts_per_prim; j++, current_idx++) { > + shader->emitted_vertices += MIN2(num_verts_per_prim, > shader->max_output_vertices); > + for (j = 0; j < MIN2(num_verts_per_prim, shader->max_output_vertices); > j++, current_idx++) { > int idx = current_idx * shader->info.num_outputs; > #ifdef DEBUG_OUTPUTS > debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs); > diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c > b/src/gallium/auxiliary/draw/draw_pt_emit.c > index 011efe7..d8e2809 100644 > --- a/src/gallium/auxiliary/draw/draw_pt_emit.c > +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c > @@ -26,6 +26,7 @@ > **/ > > #include "util/u_memory.h" > +#include "util/u_math.h" > #include "draw/draw_context.h" > #include "draw/draw_private.h" > #include "draw/draw_vbuf.h" > @@ -255,9 +256,14 @@ draw_pt_emit_linear(struct pt_emit *emit, > i < prim_info->primitive_count; > start += prim_info->primitive_lengths[i], i++) > { > + int len; > + if (start > count) > + continue; > + len = MIN2(prim_info->primitive_lengths[i], count); >render->draw_arrays(render, >start, > - prim_info->primitive_lengths[i]); > + len); > + > } > > render->release_vertices(render); > -- > 1.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] tgsi/gs: bound max output vertices in shader
Looks great. If I was into diffs I'd make sweet and passionate love to this one. Reviewed-by: Zack Rusin - Original Message - > From: Dave Airlie > > This limits the number of emitted vertices to the shaders max output > vertices, and avoids us writing things into memory that isn't big > enough for it. > > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 > src/gallium/auxiliary/tgsi/tgsi_exec.h | 1 + > 2 files changed, 9 insertions(+) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c > b/src/gallium/auxiliary/tgsi/tgsi_exec.c > index 69d98fd..d848348 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c > @@ -789,6 +789,11 @@ tgsi_exec_machine_bind_shader( > break; > >case TGSI_TOKEN_TYPE_PROPERTY: > + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) { > +if (parse.FullToken.FullProperty.Property.PropertyName == > TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { > + mach->MaxOutputVertices = > parse.FullToken.FullProperty.u[0].Data; > +} > + } > break; > >default: > @@ -1621,6 +1626,9 @@ emit_vertex(struct tgsi_exec_machine *mach) > if ((mach->ExecMask & (1 << i))) > */ > if (mach->ExecMask) { > + if > (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] > >= mach->MaxOutputVertices) > + return; > + >mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += >mach->NumOutputs; > > mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; > } > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h > b/src/gallium/auxiliary/tgsi/tgsi_exec.h > index 7a82f69..d53c4ba 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h > @@ -297,6 +297,7 @@ struct tgsi_exec_machine > unsigned *Primitives; > unsigned NumOutputs; > unsigned MaxGeometryShaderOutputs; > + unsigned MaxOutputVertices; > > /* FRAGMENT processor only. */ > const struct tgsi_interp_coef *InterpCoefs; > -- > 1.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] gallivm: handle nan's in min/max
Both D3D10 and OpenCL say that if one the inputs is nan then the other should be returned. To preserve that behavior the patche fixes both the sse and the non-sse paths in both functions. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 60 --- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e7955aa..7beb117 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -176,12 +176,36 @@ lp_build_min_simple(struct lp_build_context *bld, } if(intrinsic) { - return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, - type, - intr_size, a, b); + LLVMValueRef bmask, max; + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, ""); + max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + return LLVMBuildSelect(bld->gallivm->builder, bmask, max, a, ""); + } else { + return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, +type, +intr_size, a, b); + } } cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); + /* We need to handle nan's for floating point numbers. If one of the +* inputs is nan the other should be returned (required by both D3D10+ +* and OpenCL) +*/ + if (type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder, + LLVMRealOEQ, a, a, ""); + nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, ""); + cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, ""); + } return lp_build_select(bld, cond, a, b); } @@ -293,12 +317,36 @@ lp_build_max_simple(struct lp_build_context *bld, } if(intrinsic) { - return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, - type, - intr_size, a, b); + LLVMValueRef bmask, min; + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, ""); + min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + return LLVMBuildSelect(bld->gallivm->builder, bmask, min, a, ""); + } else { + return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, +type, +intr_size, a, b); + } } cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); + /* We need to handle nan's for floating point numbers. If one of the +* inputs is nan the other should be returned (required by both D3D10+ +* and OpenCL) +*/ + if (type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder, + LLVMRealOEQ, a, a, ""); + nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, ""); + cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, ""); + } return lp_build_select(bld, cond, a, b); } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] gallivm: fix edge cases in exp2
exp2(0) needs to be exactly 1, if exp2(src) overflows then it has to be equal to infinity and exp2(nan) has to be equal to a nan. The patches fixes all three cases. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 7beb117..467cbc6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -3004,7 +3004,7 @@ lp_build_polynomial(struct lp_build_context *bld, */ const double lp_build_exp2_polynomial[] = { #if EXP_POLY_DEGREE == 5 - 0.99925063526176901, + 1.0, 0.693153073200168932794, 0.240153617044375388211, 0.0558263180532956664775, @@ -3046,6 +3046,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; + LLVMValueRef infmask = bld->zero; + LLVMValueRef nanmask = bld->zero; assert(lp_check_value(bld->type, x)); @@ -3059,6 +3061,10 @@ lp_build_exp2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); + /* We need ot handle both inf and nan inputs */ + infmask = lp_build_cmp(bld, PIPE_FUNC_GREATER, x, + lp_build_const_vec(bld->gallivm, type, 129.0)); + nanmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x, ""); x = lp_build_min(bld, x, lp_build_const_vec(bld->gallivm, type, 129.0)); x = lp_build_max(bld, x, lp_build_const_vec(bld->gallivm, type, -126.9)); @@ -3081,6 +3087,14 @@ lp_build_exp2_approx(struct lp_build_context *bld, Elements(lp_build_exp2_polynomial)); res = LLVMBuildFMul(builder, expipart, expfpart, ""); + /* If the input would overflow make sure return is correctly an inf */ + res = lp_build_select(bld, infmask, +lp_build_const_vec(bld->gallivm, type, INFINITY), +res); + /* If the input was a nan make sure the return is also a nan */ + res = LLVMBuildSelect(bld->gallivm->builder, nanmask, res, +lp_build_const_vec(bld->gallivm, type, NAN), +""); } if(p_exp2_int_part) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] llvmpipe: fix blending with SRC_ALPHA_SATURATE with some formats without alpha
Looks good to me. - Original Message - > From: Roland Scheidegger > > We were fixing up the blend factor to ZERO, however this only works correctly > with fixed point render buffers where the input values are clamped to 0/1 > (because src_alpha_saturate is min(As, 1-Ad) so can be negative with > unclamped > inputs). Haven't seen any failure anywhere due to that with fixed point SNORM > buffers (which clamp inputs to -1/1) but it should apply there as well (snorm > blending is rare, even opengl 4.3 doesn't require snorm rendertargets at all, > d3d10 requires them but they are not blendable). > Doesn't look like piglit hits this though (some internal testing hits the > float case at least). (With legacy OpenGL we could theoretically still use > the > fixup to zero if the fragment color clamp is enabled, but we can't detect > that > easily since we don't support native clamping hence it gets baked into the > shader.) > --- > src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 18 ++ > src/gallium/drivers/llvmpipe/lp_state_fs.c | 16 > 2 files changed, 26 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > index c4d04a2..377eaa5 100644 > --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > @@ -114,10 +114,20 @@ lp_build_blend_factor_unswizzled(struct > lp_build_blend_aos_context *bld, >if(alpha) > return bld->base.one; >else { > - if(!bld->inv_dst) > -bld->inv_dst = lp_build_comp(&bld->base, bld->dst); > - if(!bld->saturate) > -bld->saturate = lp_build_min(&bld->base, src_alpha, > bld->inv_dst); > + /* > + * if there's separate src_alpha there's no dst alpha hence the > complement > + * is zero but for unclamped float inputs min can be non-zero > (negative). > + */ > + if (bld->src_alpha) { > +if (!bld->saturate) > + bld->saturate = lp_build_min(&bld->base, src_alpha, > bld->base.zero); > + } > + else { > +if(!bld->inv_dst) > + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); > +if(!bld->saturate) > + bld->saturate = lp_build_min(&bld->base, src_alpha, > bld->inv_dst); > + } > return bld->saturate; >} > case PIPE_BLENDFACTOR_CONST_COLOR: > diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c > b/src/gallium/drivers/llvmpipe/lp_state_fs.c > index afd01e3..a305109 100644 > --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c > +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c > @@ -2607,7 +2607,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, > * Return the blend factor equivalent to a destination alpha of one. > */ > static INLINE unsigned > -force_dst_alpha_one(unsigned factor) > +force_dst_alpha_one(unsigned factor, boolean clamped_zero) > { > switch(factor) { > case PIPE_BLENDFACTOR_DST_ALPHA: > @@ -2615,7 +2615,10 @@ force_dst_alpha_one(unsigned factor) > case PIPE_BLENDFACTOR_INV_DST_ALPHA: >return PIPE_BLENDFACTOR_ZERO; > case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: > - return PIPE_BLENDFACTOR_ZERO; > + if (clamped_zero) > + return PIPE_BLENDFACTOR_ZERO; > + else > + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; > } > > return factor; > @@ -2735,8 +2738,13 @@ make_variant_key(struct llvmpipe_context *lp, > */ >if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W || >format_desc->swizzle[3] == format_desc->swizzle[0]) { > - blend_rt->rgb_src_factor = > force_dst_alpha_one(blend_rt->rgb_src_factor); > - blend_rt->rgb_dst_factor = > force_dst_alpha_one(blend_rt->rgb_dst_factor); > + /* Doesn't cover mixed snorm/unorm but can't render to them anyway > */ > + boolean clamped_zero = !util_format_is_float(format) && > +!util_format_is_snorm(format); > + blend_rt->rgb_src_factor = > force_dst_alpha_one(blend_rt->rgb_src_factor, > + clamped_zero); > + blend_rt->rgb_dst_factor = > force_dst_alpha_one(blend_rt->rgb_dst_factor, > + clamped_zero); > blend_rt->alpha_func = blend_rt->rgb_func; > blend_rt->alpha_src_factor = blend_rt->rgb_src_factor; > blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor; > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: fix rgtc snorm decoding
Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > The codeword must be unsigned (otherwise will shift in 1's from above when > merging low/high parts so some texels decode wrong). > This also affects gallium's util/u_format_rgtc. > --- > src/mesa/main/texcompress_rgtc_tmp.h |6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/main/texcompress_rgtc_tmp.h > b/src/mesa/main/texcompress_rgtc_tmp.h > index 277d69b..5fa9de6 100644 > --- a/src/mesa/main/texcompress_rgtc_tmp.h > +++ b/src/mesa/main/texcompress_rgtc_tmp.h > @@ -37,9 +37,9 @@ static void TAG(fetch_texel_rgtc)(unsigned srcRowStride, > const TYPE *pixdata, > const TYPE alpha0 = blksrc[0]; > const TYPE alpha1 = blksrc[1]; > const char bit_pos = ((j&3) * 4 + (i&3)) * 3; > - const TYPE acodelow = blksrc[2 + bit_pos / 8]; > - const TYPE acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 + bit_pos / 8] : > 0; > - const TYPE code = (acodelow >> (bit_pos & 0x7) | > + const unsigned char acodelow = blksrc[2 + bit_pos / 8]; > + const unsigned char acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 + > bit_pos / 8] : 0; > + const unsigned char code = (acodelow >> (bit_pos & 0x7) | >(acodehigh << (8 - (bit_pos & 0x7 & 0x7; > > if (code == 0) > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] draw: cleanup and fix instance id computation
The instance id system value always starts at 0, even if the specified start instance is larger than 0. Instead of implicitly setting instance id to instance id plus start instance and then having to subtract instance id when computing the buffer offsets lets just set instance id to the proper instance id. This fixes instance id computation and cleansup buffer offset computation. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c |7 +++ src/gallium/auxiliary/draw/draw_pt.c|7 --- src/gallium/auxiliary/translate/translate_generic.c |3 +-- src/gallium/auxiliary/translate/translate_sse.c |4 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 79e7a9b..a3174b4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -707,15 +707,14 @@ generate_fetch(struct gallivm_state *gallivm, if (velem->instance_divisor) { /* Index is equal to the start instance plus the number of current * instance divided by the divisor. In this case we compute it as: - * index = start_instance + ((instance_id - start_instance) / divisor) + * index = start_instance + (instance_id / divisor) */ LLVMValueRef current_instance; index = lp_build_const_int32(gallivm, draw->start_instance); - current_instance = LLVMBuildSub(builder, instance_id, index, ""); - current_instance = LLVMBuildUDiv(builder, current_instance, + current_instance = LLVMBuildUDiv(builder, instance_id, lp_build_const_int32(gallivm, velem->instance_divisor), "instance_divisor"); - index = LLVMBuildAdd(builder, index, current_instance, "instance"); + index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit); } stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit); diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index ccde371..fcc2405 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -542,11 +542,12 @@ draw_vbo(struct draw_context *draw, */ for (instance = 0; instance < info->instance_count; instance++) { - draw->instance_id = instance + info->start_instance; + unsigned instance_idx = instance + info->start_instance; draw->start_instance = info->start_instance; + draw->instance_id = instance; /* check for overflow */ - if (draw->instance_id < instance || - draw->instance_id < info->start_instance) { + if (instance_idx < instance || + instance_idx < draw->start_instance) { /* if we overflown just set the instance id to the max */ draw->instance_id = 0x; } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 96e35b0..fdab0f3 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -625,8 +625,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * if (tg->attrib[attr].instance_divisor) { index = start_instance; -index += (instance_id - start_instance) / - tg->attrib[attr].instance_divisor; +index += (instance_id / tg->attrib[attr].instance_divisor); /* XXX we need to clamp the index here too, but to a * per-array max value, not the draw->pt.max_index value * that's being given to us via translate->set_buffer(). diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a4f7b24..726a9b1 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1094,10 +1094,6 @@ static boolean init_inputs( struct translate_sse *p, struct x86_reg tmp_EDX = p->tmp2_EDX; struct x86_reg tmp_ECX = p->src_ECX; - /* instance_num = instance_id - start_instance */ - x86_mov(p->func, tmp_EDX, start_instance); - x86_sub(p->func, tmp_EAX, tmp_EDX); - /* TODO: Add x86_shr() to rtasm and use it whenever * instance divisor is power of two. */ -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] draw: fix vertex id computation
vertex id has to be unaffected by the start index (i.e. when calling draw arrays with start_index = 5, the first vertex_id has to still be 0, not 5) and it has to be equal to the index when performing indexed rendering (in which case it has to be unaffected by the index bias). This fixes our behavior. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c| 37 ++--- src/gallium/auxiliary/draw/draw_private.h |1 + src/gallium/auxiliary/draw/draw_pt.c |1 + 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a3174b4..adf3941 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1646,22 +1646,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, #endif system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length)); for (i = 0; i < vector_length; ++i) { - LLVMValueRef true_index = + LLVMValueRef vert_index = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, i), ""); - true_index = LLVMBuildAdd(builder, start, true_index, ""); + LLVMValueRef true_index = +LLVMBuildAdd(builder, start, vert_index, ""); + LLVMValueRef vertex_id; /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ true_index = lp_build_min(&bld, true_index, fetch_max); - system_values.vertex_id = LLVMBuildInsertElement( -gallivm->builder, -system_values.vertex_id, true_index, -lp_build_const_int32(gallivm, i), ""); - if (elts) { LLVMValueRef fetch_ptr; LLVMValueRef index_overflowed; @@ -1673,7 +1670,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT, true_index, fetch_elt_max, "index_overflowed"); - + lp_build_if(&if_ctx, gallivm, index_overflowed); { /* Generate maximum possible index so that @@ -1697,8 +1694,32 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, } lp_build_endif(&if_ctx); true_index = LLVMBuildLoad(builder, index_ptr, "true_index"); +/* vertex id has to be unaffected by the index bias and because + * indices inside our elements array have already had index + * bias applied we need to subtract it here to get back to the + * original index. + */ +vertex_id = LLVMBuildSub( + builder, true_index, + lp_build_const_int32(gallivm, draw->pt.user.eltBias), ""); + } else { +/* vertex id has to be unaffected by the original start index + * and because we abuse the 'start' variable to either represent + * the actual start index or the index at which the primitive + * was split (we split rendering into chunks of at most + * 4095-vertices) we need to back out the original start + * index out of our vertex id here. + */ +vertex_id = LLVMBuildSub( + builder, true_index, + lp_build_const_int32(gallivm, draw->start_index), ""); } + system_values.vertex_id = LLVMBuildInsertElement( +gallivm->builder, +system_values.vertex_id, vertex_id, +lp_build_const_int32(gallivm, i), ""); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; LLVMValueRef vb_index = diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index d8cd8eb..868b6c7 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -311,6 +311,7 @@ struct draw_context unsigned instance_id; unsigned start_instance; + unsigned start_index; #ifdef HAVE_LLVM struct draw_llvm *llvm; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index fcc2405..5b16bc7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -535,6 +535,7 @@ draw_vbo(struct draw_context *draw, } draw->pt.ma
[Mesa-dev] [PATCH 3/3] draw/llvmpipe: allow indexed rendering without index buffer bound
this is a wonky requirement of d3d10, which expects that if indexed rendering call is issued without an indexed buffer bound, the rendering should still happen but with all indices set to 0. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_private.h |5 +++-- src/gallium/auxiliary/draw/draw_pt.c|4 +--- src/gallium/auxiliary/draw/draw_pt_vsplit.c |3 ++- src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h |6 +++--- src/gallium/drivers/llvmpipe/lp_draw_arrays.c |2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 868b6c7..dfb71c9 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -478,8 +478,9 @@ draw_stats_clipper_primitives(struct draw_context *draw, * If the index buffer would overflow we return the * maximum possible index. */ -#define DRAW_GET_IDX(_elts, _i) \ - (((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i]) +#define DRAW_GET_IDX(_elts, _i) \ + (!_elts ? (0) : \ +(((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i])) /** * Return index of the given viewport clamping it diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 5b16bc7..bf54f85 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -347,7 +347,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) do { \ for (j = 0; j < count; j++) { \ i = draw_overflow_uadd(start, j, MAX_LOOP_IDX); \ - if (i < elt_max && elements[i] == info->restart_index) { \ + if (i < elt_max && (elements && elements[i] == info->restart_index)) { \ if (cur_count > 0) { \ /* draw elts up to prev pos */ \ draw_pt_arrays(draw, prim, cur_start, cur_count); \ @@ -471,8 +471,6 @@ draw_vbo(struct draw_context *draw, info = &resolved_info; assert(info->instance_count > 0); - if (info->indexed) - assert(draw->pt.user.elts); count = info->count; diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c index 625505d..395a38c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -116,7 +116,8 @@ vsplit_get_base_idx(struct vsplit_frontend *vsplit, unsigned start, unsigned fetch, unsigned *ofbit) { struct draw_context *draw = vsplit->draw; - unsigned elt_idx = draw_overflow_uadd(start, fetch, MAX_ELT_IDX); + unsigned elt_idx = draw->pt.user.elts ? + draw_overflow_uadd(start, fetch, MAX_ELT_IDX) : 0; if (ofbit) *ofbit = 0; diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index 5d72ac6..4f462c0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -44,8 +44,8 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, unsigned fetch_start, fetch_count; const ushort *draw_elts = NULL; unsigned i; - const unsigned start = istart; - const unsigned end = istart + icount; + const unsigned start = ib ? istart : 0; + const unsigned end = ib ? istart + icount : icount; /* If the index buffer overflows we'll need to run * through the normal paths */ @@ -55,7 +55,7 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, return FALSE; /* use the ib directly */ - if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) { + if (ib && min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) { if (icount > vsplit->max_vertices) return FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 4e23904..32d8f60 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -83,7 +83,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (info->indexed) { unsigned available_space = ~0; mapped_indices = lp->index_buffer.user_buffer; - if (!mapped_indices) { + if (!mapped_indices && lp->index_buffer.buffer) { mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); if (lp->index_buffer.buffer->width0 > lp->index_buffer.offset) available_space = -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: always call and move util_cpu_detect() to draw context creation.
Nice catch! Thanks! - Original Message - > From: Roland Scheidegger > > CPU detection is not really x86 specific, the ifdef in particular didn't > even catch x86_64. > Also move to draw context creation which seems a lot cleaner, and just > call it always (which seems like a better idea than rely on drivers doing > this > especially if drivers otherwise don't need it). > This fixes https://bugs.freedesktop.org/show_bug.cgi?id=66806. > (Because util_cpu_caps wasn't initialized when first calling > util_fpstate_get() > hence it returning zero, but it would later get initialized by rtasm > translate > code hence when draw call returned it unmasked all exceptions by calling > util_fpstate_set(). This was happening only with DRAW_USE_LLVM=0 or not > compiling with llvm, otherwise the llvm init code was calling it on time > too.) > --- > src/gallium/auxiliary/draw/draw_context.c |5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_context.c > b/src/gallium/auxiliary/draw/draw_context.c > index 4a08765..26af984 100644 > --- a/src/gallium/auxiliary/draw/draw_context.c > +++ b/src/gallium/auxiliary/draw/draw_context.c > @@ -57,8 +57,7 @@ draw_get_option_use_llvm(void) >value = debug_get_bool_option("DRAW_USE_LLVM", TRUE); > > #ifdef PIPE_ARCH_X86 > - util_cpu_detect(); > - /* require SSE2 due to LLVM PR6960. */ > + /* require SSE2 due to LLVM PR6960. XXX Might be fixed by now? */ >if (!util_cpu_caps.has_sse2) > value = FALSE; > #endif > @@ -78,6 +77,8 @@ draw_create_context(struct pipe_context *pipe, boolean > try_llvm) > if (draw == NULL) >goto err_out; > > + util_cpu_detect(); > + > #if HAVE_LLVM > if (try_llvm && draw_get_option_use_llvm()) { >draw->llvm = draw_llvm_create(draw); > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] draw: fix vertex id computation
vertex id has to be unaffected by the start index (i.e. when calling draw arrays with start_index = 5, the first vertex_id has to still be 0, not 5) and it has to be equal to the index when performing indexed rendering (in which case it has to be unaffected by the index bias). This fixes our behavior. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c | 36 +++- src/gallium/auxiliary/draw/draw_llvm.h |6 ++-- src/gallium/auxiliary/draw/draw_private.h |1 + src/gallium/auxiliary/draw/draw_pt.c |1 + .../draw/draw_pt_fetch_shade_pipeline_llvm.c |6 ++-- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a3174b4..c195a2b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1486,7 +1486,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, struct gallivm_state *gallivm = variant->gallivm; LLVMContextRef context = gallivm->context; LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); - LLVMTypeRef arg_types[9]; + LLVMTypeRef arg_types[10]; unsigned num_arg_types = elts ? Elements(arg_types) : Elements(arg_types) - 1; LLVMTypeRef func_type; @@ -1496,6 +1496,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, struct lp_type vs_type; LLVMValueRef end, start; LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count; + LLVMValueRef vertex_id_offset; LLVMValueRef stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef zero = lp_build_const_int32(gallivm, 0); @@ -1541,6 +1542,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, arg_types[i++] = int32_type; /* stride */ arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */ arg_types[i++] = int32_type; /* instance_id */ + arg_types[i++] = int32_type; /* vertex_id_offset */ func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0); @@ -1565,6 +1567,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, stride= LLVMGetParam(variant_func, 5 + (elts ? 1 : 0)); vb_ptr= LLVMGetParam(variant_func, 6 + (elts ? 1 : 0)); system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0)); + vertex_id_offset = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0)); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -1572,6 +1575,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); lp_build_name(system_values.instance_id, "instance_id"); + lp_build_name(vertex_id_offset, "vertex_id_offset"); if (elts) { fetch_elts= LLVMGetParam(variant_func, 3); @@ -1646,22 +1650,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, #endif system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length)); for (i = 0; i < vector_length; ++i) { - LLVMValueRef true_index = + LLVMValueRef vert_index = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, i), ""); - true_index = LLVMBuildAdd(builder, start, true_index, ""); + LLVMValueRef true_index = +LLVMBuildAdd(builder, start, vert_index, ""); + LLVMValueRef vertex_id; /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ true_index = lp_build_min(&bld, true_index, fetch_max); - system_values.vertex_id = LLVMBuildInsertElement( -gallivm->builder, -system_values.vertex_id, true_index, -lp_build_const_int32(gallivm, i), ""); - if (elts) { LLVMValueRef fetch_ptr; LLVMValueRef index_overflowed; @@ -1673,7 +1674,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT, true_index, fetch_elt_max, "index_overflowed"); - + lp_build_if(&if_ctx, gallivm, index_overflowed); { /* Generate maximum possible index so that @@ -1698,6 +1699,23 @@ draw_llvm_generate(struc
Re: [Mesa-dev] [PATCH 06/34] draw/gs: fix allocation of buffer for GS output vertices
That looks wrong to me. We already account for the "other fields" in the vertex_size. - Original Message - > From: Bryan Cain > > Before, it accounted for the size of the vertices but not the other fields > in the vertex_header struct, which caused memory corruption. > --- > src/gallium/auxiliary/draw/draw_gs.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/auxiliary/draw/draw_gs.c > b/src/gallium/auxiliary/draw/draw_gs.c > index cd63e2b..78727c6 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.c > +++ b/src/gallium/auxiliary/draw/draw_gs.c > @@ -560,7 +560,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader > *shader, > /* we allocate exactly one extra vertex per primitive to allow the GS to > emit > * overflown vertices into some area where they won't harm anyone */ > output_verts->verts = > - (struct vertex_header *)MALLOC(output_verts->vertex_size * > + (struct vertex_header *)MALLOC(sizeof(struct vertex_header) + > + output_verts->vertex_size * > max_out_prims * > shader->primitive_boundary); > > -- > 1.8.3.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 24/34] gallivm: Fix indirect input fetches for gs.
That looks wrong to me as well. What would make get_indirect_index different in this case? - Original Message - > From: Fabian Bieler > > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index c199385..1d27e81 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -1120,6 +1120,9 @@ emit_fetch_gs_input( >reg->Register.File, >reg->Register.Index, >®->Indirect); > + > + LLVMValueRef ind = lp_build_const_int32(gallivm, 0); > + attrib_index = LLVMBuildExtractElement(builder, attrib_index, ind, > ""); > } else { >attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); > } > -- > 1.8.3.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] tgsi: add ucmp to the list of opcodes
we forgot to add ucmp to the list of opcodes, so it was never generated for ureg. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b87c4b1..93ec0b5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -198,6 +198,8 @@ OP12(SVIEWINFO) OP13(SAMPLE_POS) OP12(SAMPLE_INFO) +OP13(UCMP) + #undef OP00 #undef OP01 -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: obey clarified shift behavior
> From: Roland Scheidegger > > llvm shifts are undefined for shift counts exceeding (or matching) bit width, > so need to apply a mask for the tgsi shift instructions. > > v2: only use mask for the tgsi shift instructions, not for the build shift > helpers. None of the internal callers need this behavior, and while llvm can > optimize away the masking for constants there are legitimate cases where it > might not be able to do so even if we know that shift count must be smaller > than type width (currently all such callers do not use the build shift > helpers). Looks good to me ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] llvmpipe: make the front-face behavior match the gallium spec
The spec says that front-face is true if the value is >0 and false if it's <0. To make sure that we follow the spec, lets just subtract 0.5 from our value (llvmpipe did 1 for frontface and 0 otherwise), which will get us a positive num for frontface and negative for backface. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_state_setup.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index bb5cfc4..cecfbce 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -182,7 +182,10 @@ emit_facing_coef(struct gallivm_state *gallivm, LLVMValueRef a0_0 = args->facing; LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); LLVMValueRef zero = lp_build_const_float(gallivm, 0.0); - LLVMValueRef a0 = vec4f(gallivm, a0_0f, zero, zero, zero, "facing"); + LLVMValueRef face_val = LLVMBuildFSub(builder, a0_0f, + lp_build_const_float(gallivm, 0.5), + ""); + LLVMValueRef a0 = vec4f(gallivm, face_val, zero, zero, zero, "facing"); LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero"); store_coef(gallivm, args, slot, a0, zerovec, zerovec); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] draw: inject frontface info into wireframe outputs
Draw module can decompose primitives into wireframe models, which is a fancy word for 'lines', unfortunately that decomposition means that we weren't able to preserve the original front-face info which could be derived from the original primitives (lines don't have a 'face'). To fix it allow draw module to inject a fake face semantic into outputs from which the backends can figure out the original frontfacing info of the primitives. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_context.c | 43 src/gallium/auxiliary/draw/draw_context.h |6 +++ src/gallium/auxiliary/draw/draw_pipe.h |3 ++ src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 49 +++ src/gallium/drivers/i915/i915_state_derived.c |2 + src/gallium/drivers/llvmpipe/lp_context.h |3 ++ src/gallium/drivers/llvmpipe/lp_setup.c |1 + src/gallium/drivers/llvmpipe/lp_setup_context.h |1 + src/gallium/drivers/llvmpipe/lp_setup_line.c| 14 ++- src/gallium/drivers/llvmpipe/lp_state_derived.c |9 + src/gallium/drivers/r300/r300_state_derived.c |1 + src/gallium/drivers/softpipe/sp_state_derived.c |2 + src/gallium/drivers/svga/svga_swtnl_state.c |1 + 13 files changed, 133 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4a6ba1a..2e95b5c 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -39,6 +39,7 @@ #include "util/u_helpers.h" #include "util/u_prim.h" #include "draw_context.h" +#include "draw_pipe.h" #include "draw_vs.h" #include "draw_gs.h" @@ -540,6 +541,22 @@ draw_get_shader_info(const struct draw_context *draw) } } +/** + * Prepare outputs slots from the draw module + * + * Certain parts of the draw module can emit additional + * outputs that can be quite useful to the backends, a good + * example of it is the process of decomposing primitives + * into wireframes (aka. lines) which normally would lose + * the face-side information, but using this method we can + * inject another shader output which passes the original + * face side information to the backend. + */ +void +draw_prepare_shader_outputs(struct draw_context *draw) +{ + draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); +} /** * Ask the draw module for the location/slot of the given vertex attribute in @@ -973,3 +990,29 @@ draw_stats_clipper_primitives(struct draw_context *draw, } } } + + +/** + * Returns true if the draw module will inject the frontface + * info into the outputs. + * + * Given the specified primitive and rasterizer state + * the function will figure out if the draw module + * will inject the front-face information into shader + * outputs. This is done to preserve the front-facing + * info when decomposing primitives into wireframes. + */ +boolean +draw_will_inject_frontface(const struct draw_context *draw) +{ + unsigned reduced_prim = u_reduced_prim(draw->pt.prim); + const struct pipe_rasterizer_state *rast = draw->rasterizer; + + if (reduced_prim != PIPE_PRIM_TRIANGLES) { + return FALSE; + } + + return (rast && + (rast->fill_front != PIPE_POLYGON_MODE_FILL || +rast->fill_back != PIPE_POLYGON_MODE_FILL)); +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 4a1b27e..0815047 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -126,10 +126,16 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe struct tgsi_shader_info * draw_get_shader_info(const struct draw_context *draw); +void +draw_prepare_shader_outputs(struct draw_context *draw); + int draw_find_shader_output(const struct draw_context *draw, uint semantic_name, uint semantic_index); +boolean +draw_will_inject_frontface(const struct draw_context *draw); + uint draw_num_shader_outputs(const struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 4792507..2e48b56 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -102,6 +102,9 @@ void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *he void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); +void draw_unfilled_prepare_outputs(struct draw_context *context, + struct draw_stage *stage); + /** * Get a writeable copy of a vertex. diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index d87741b..d8a603f 100644 --- a/src/gallium/a
Re: [Mesa-dev] [PATCH 2/2] draw: inject frontface info into wireframe outputs
> > + if (draw_will_inject_frontface(lp_context->draw) && > I think it's annoying you have to do these calls to determine if there's > a valid frontface here for each line instead of just per draw call but > it doesn't seem easy to avoid it. Yea, there's no trivial way of avoiding it. > Also, no love for llvmpipe point face? I realize d3d10 doesn't require > it but OpenGL (and IIRC d3d9) do. I didn't know of any tests for the points and we care only about lines right now. It's just four extra lines of code or so, so I can trivially add it but I don't have anything to test it with. > Looks like quite a heavy interface (and sort of silly to allocate 128 > bits in the vertex data (so actually twice that for one line) for 1 bit > of information but given all our data passed on to the line/point funcs > are float4 I don't really see any other easy way neither), but seems all > necessary unfortunately. I guess another option would be to pass the > face info always along the vertex data no matter what (which would mean > all those additional calls for setting up outputs, determining if > there's a valid frontface etc. could go along with the storage needed) > for all primitives to the point/line/tri funcs but I'm not really > thrilled about that idea neither (passing it for tris so it doesn't have > to be recalculated may or may not be a good idea neither). Yes, plus then we'd need a brand new pipeline stage that is always run and that is largely useless for vast majority of rendering. It's sort of a lose lose scenario. The only thing that is clear is that we have to pass the data along the shader outputs, everything else is a messy glue to make it possible. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] tgsi: detect prim id and front face usage in fs
Adding code to detect the usage of prim id and front face semantics in fragment shaders. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/tgsi/tgsi_scan.c |9 +++-- src/gallium/auxiliary/tgsi/tgsi_scan.h |1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 1fe1a07..e7bf6e6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -166,9 +166,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; - if (procType == TGSI_PROCESSOR_FRAGMENT && - fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) info->reads_position = TRUE; + else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_PRIMID) +info->uses_primid = TRUE; + else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_FACE) +info->uses_frontface = TRUE; + } } else if (file == TGSI_FILE_SYSTEM_VALUE) { unsigned index = fulldecl->Range.First; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index cfa2b8e..e2fa73a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -74,6 +74,7 @@ struct tgsi_shader_info boolean uses_instanceid; boolean uses_vertexid; boolean uses_primid; + boolean uses_frontface; boolean origin_lower_left; boolean pixel_center_integer; boolean color0_writes_all_cbufs; -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/8] draw: stop crashing with extra shader outputs
Draw sometimes injects extra shader outputs (aa points, lines or front face), unfortunately most of the pipeline and llvm code didn't handle them at all. It only worked if number of inputs happened to be bigger or equal to the number of shader outputs plus the extra injected outputs. In particular when running the pipeline which depends on the vertex_id in the vertex_header things were completely broken. The patch adjust the code to correctly use the total number of shader outputs (the standard ones plus the injected ones) to make it all stop crashing and work. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_context.c | 43 src/gallium/auxiliary/draw/draw_context.h |5 +++ src/gallium/auxiliary/draw/draw_gs.c |2 +- src/gallium/auxiliary/draw/draw_llvm.c |3 ++ src/gallium/auxiliary/draw/draw_llvm.h |4 +- src/gallium/auxiliary/draw/draw_pipe.h |3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c |6 +-- .../draw/draw_pt_fetch_shade_pipeline_llvm.c |8 +--- src/gallium/auxiliary/draw/draw_vs_variant.c |2 +- 9 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2e95b5c..8bf3596 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -622,6 +622,49 @@ draw_num_shader_outputs(const struct draw_context *draw) /** + * Return total number of the vertex shader outputs. This function + * also counts any extra vertex output attributes that may + * be filled in by some draw stages (such as AA point, AA line, + * front face). + */ +uint +draw_total_vs_shader_outputs(const struct draw_context *draw) +{ + const struct tgsi_shader_info *info = &draw->vs.vertex_shader->info; + uint count; + + count = info->num_outputs; + count += draw->extra_shader_outputs.num; + + return count; +} + +/** + * Return total number of the geometry shader outputs. This function + * also counts any extra geometry output attributes that may + * be filled in by some draw stages (such as AA point, AA line, front + * face). + */ +uint +draw_total_gs_shader_outputs(const struct draw_context *draw) +{ + + const struct tgsi_shader_info *info; + uint count; + + if (!draw->gs.geometry_shader) + return 0; + + info = &draw->gs.geometry_shader->info; + + count = info->num_outputs; + count += draw->extra_shader_outputs.num; + + return count; +} + + +/** * Provide TGSI sampler objects for vertex/geometry shaders that use * texture fetches. This state only needs to be set once per context. * This might only be used by software drivers for the time being. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 0815047..e9aa24d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -139,6 +139,11 @@ draw_will_inject_frontface(const struct draw_context *draw); uint draw_num_shader_outputs(const struct draw_context *draw); +uint +draw_total_vs_shader_outputs(const struct draw_context *draw); + +uint +draw_total_gs_shader_outputs(const struct draw_context *draw); void draw_texture_sampler(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index cd63e2b..32fd91f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -534,7 +534,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, { const float (*input)[4] = (const float (*)[4])input_verts->verts->data; unsigned input_stride = input_verts->vertex_size; - unsigned num_outputs = shader->info.num_outputs; + unsigned num_outputs = draw_total_gs_shader_outputs(shader->draw); unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); unsigned num_input_verts = input_prim->linear ? input_verts->count : diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index c195a2b..8ecb3e7 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1827,6 +1827,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; key->has_gs = llvm->draw->gs.geometry_shader != NULL; + key->num_outputs = draw_total_vs_shader_outputs(llvm->draw); key->pad1 = 0; /* All variants of this shader will have the same value for @@ -2264,6 +2265,8 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key = (struct draw_gs_llvm_variant_key *)store; +
[Mesa-dev] [PATCH 3/8] draw/llvm: add some extra debugging output
when dumping shader outputs it's nice to have the integer values of the outputs, in particular because some values are integers. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8ecb3e7..df0d2ed 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -977,6 +977,12 @@ convert_to_aos(struct gallivm_state *gallivm, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), chan, 0)); lp_build_print_value(gallivm, "val = ", out); +{ + LLVMValueRef iv = + LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), ""); + + lp_build_print_value(gallivm, " ival = ", iv); +} #endif soa[chan] = out; } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/8] draw: make sure clipping works with injected outputs
clipping would drop the extra outputs because it always used the number of standard vertex shader outputs, without geometry shader or extra outputs. The commit makes sure that clipping with geometry shaders which have more outputs than the current vertex shader and with extra outputs correctly propagates the entire vertex. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 89 --- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index e83586e..b76e9a5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -136,7 +136,7 @@ static void interp( const struct clip_stage *clip, const struct vertex_header *in, unsigned viewport_index ) { - const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw); + const unsigned nr_attrs = draw_num_shader_outputs(clip->stage.draw); const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw); const unsigned clip_attr = draw_current_shader_clipvertex_output(clip->stage.draw); unsigned j; @@ -264,7 +264,6 @@ static void emit_poly( struct draw_stage *stage, header.flags |= edge_last; if (DEBUG_CLIP) { - const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint j, k; debug_printf("Clipped tri: (flat-shade-first = %d)\n", stage->draw->rasterizer->flatshade_first); @@ -274,7 +273,7 @@ static void emit_poly( struct draw_stage *stage, header.v[j]->clip[1], header.v[j]->clip[2], header.v[j]->clip[3]); -for (k = 0; k < vs->info.num_outputs; k++) { +for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) { debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, header.v[j]->data[k][0], header.v[j]->data[k][1], @@ -283,7 +282,6 @@ static void emit_poly( struct draw_stage *stage, } } } - stage->next->tri( stage->next, &header ); } } @@ -609,6 +607,35 @@ clip_tri( struct draw_stage *stage, } +static int +find_interp(const struct draw_fragment_shader *fs, int *indexed_interp, +uint semantic_name, uint semantic_index) +{ + int interp; + /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode +* from the array we've filled before. */ + if (semantic_name == TGSI_SEMANTIC_COLOR || + semantic_name == TGSI_SEMANTIC_BCOLOR) { + interp = indexed_interp[semantic_index]; + } else { + /* Otherwise, search in the FS inputs, with a decent default + * if we don't find it. + */ + uint j; + interp = TGSI_INTERPOLATE_PERSPECTIVE; + if (fs) { + for (j = 0; j < fs->info.num_inputs; j++) { +if (semantic_name == fs->info.input_semantic_name[j] && +semantic_index == fs->info.input_semantic_index[j]) { + interp = fs->info.input_interpolate[j]; + break; +} + } + } + } + return interp; +} + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ @@ -616,11 +643,9 @@ static void clip_init_state( struct draw_stage *stage ) { struct clip_stage *clipper = clip_stage( stage ); - const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; - const struct draw_geometry_shader *gs = stage->draw->gs.geometry_shader; const struct draw_fragment_shader *fs = stage->draw->fs.fragment_shader; - uint i; - const struct tgsi_shader_info *vs_info = gs ? &gs->info : &vs->info; + uint i, j; + const struct tgsi_shader_info *info = draw_get_shader_info(stage->draw); /* We need to know for each attribute what kind of interpolation is * done on it (flat, smooth or noperspective). But the information @@ -663,42 +688,36 @@ clip_init_state( struct draw_stage *stage ) clipper->num_flat_attribs = 0; memset(clipper->noperspective_attribs, 0, sizeof(clipper->noperspective_attribs)); - for (i = 0; i < vs_info->num_outputs; i++) { - /* Find the interpolation mode for a specific attribute - */ - int interp; - - /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode - * from the array we've filled before. */ - if (vs_info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs_info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - interp = indexed_interp[vs_info->output_semantic_index[i]]; - } else { -
[Mesa-dev] [PATCH 5/8] draw: use the vertex size
Instead of using the magical 4 use the above computed vertex size. Doesn't change the behavior, just makes the code a bit cleaner. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index d3b38eb..092440e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -250,7 +250,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) } hw_key.nr_elements = vbuf->vinfo->num_attribs; - hw_key.output_stride = vbuf->vinfo->size * 4; + hw_key.output_stride = vbuf->vertex_size; /* Don't bother with caching at this stage: */ -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] draw: fix front face injection
Inject front face only if the fragment shader uses it and propagate through all channels because otherwise we'll need to figure out the exact swizzle that the fs expects and it's just simpler to make sure all the components within the front face register are correctly set. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 24 ++- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index d8a603f..f9a31b0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -37,6 +37,7 @@ #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" +#include "draw_fs.h" struct unfilled_stage { @@ -67,18 +68,20 @@ inject_front_face_info(struct draw_stage *stage, (stage->draw->rasterizer->front_ccw && ccw) || (!stage->draw->rasterizer->front_ccw && !ccw)); unsigned slot = unfilled->face_slot; - struct vertex_header *v0 = header->v[0]; - struct vertex_header *v1 = header->v[1]; - struct vertex_header *v2 = header->v[2]; + unsigned i; /* In case the backend doesn't care about it */ if (slot < 0) { return; } - v0->data[slot][0] = is_front_face; - v1->data[slot][0] = is_front_face; - v2->data[slot][0] = is_front_face; + for (i = 0; i < 3; ++i) { + struct vertex_header *v = header->v[i]; + v->data[slot][0] = is_front_face; + v->data[slot][1] = is_front_face; + v->data[slot][2] = is_front_face; + v->data[slot][3] = is_front_face; + } } @@ -231,9 +234,12 @@ draw_unfilled_prepare_outputs( struct draw_context *draw, { struct unfilled_stage *unfilled = unfilled_stage(stage); const struct pipe_rasterizer_state *rast = draw ? draw->rasterizer : 0; - if (rast && - (rast->fill_front != PIPE_POLYGON_MODE_FILL || -rast->fill_back != PIPE_POLYGON_MODE_FILL)) { + boolean is_unfilled = (rast && + (rast->fill_front != PIPE_POLYGON_MODE_FILL || + rast->fill_back != PIPE_POLYGON_MODE_FILL)); + const struct draw_fragment_shader *fs = draw->fs.fragment_shader; + + if (is_unfilled && fs && fs->info.uses_frontface) { unfilled->face_slot = draw_alloc_extra_vertex_attrib( stage->draw, TGSI_SEMANTIC_FACE, 0); } else { -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/8] llvmpipe: don't interpolate front face or prim id
The loop was iterating over all the fs inputs and setting them to perspective interpolation, then after the loop we were creating extra output slots with the correct interpolation. Instead of injecting bogus extra outputs, just set the interpolation on front face and prim id correctly when doing the initial scan of fs inputs. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 30 +++ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 5a51b50..7b1e6f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -69,8 +69,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) vinfo->num_attribs = 0; vs_index = draw_find_shader_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, - 0); + TGSI_SEMANTIC_POSITION, + 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); @@ -89,12 +89,20 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) llvmpipe->color_slot[idx] = (int)vinfo->num_attribs; } - /* - * Emit the requested fs attribute for all but position. - */ - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + if (lpfs->info.base.input_semantic_index[i] == 0 && + lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) { + llvmpipe->face_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else if (lpfs->info.base.input_semantic_index[i] == 0 && + lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else { + /* + * Emit the requested fs attribute for all but position. + */ + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + } } - /* Figure out if we need bcolor as well. */ for (i = 0; i < 2; i++) { @@ -140,14 +148,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) llvmpipe->layer_slot = 0; } - /* Check for a fake front face for unfilled primitives*/ - vs_index = draw_find_shader_output(llvmpipe->draw, - TGSI_SEMANTIC_FACE, 0); - if (vs_index >= 0) { - llvmpipe->face_slot = vinfo->num_attribs; - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); - } - draw_compute_vertex_size(vinfo); lp_setup_set_vertex_info(llvmpipe->setup, vinfo); } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/8] draw: implement proper primitive assembler as a pipeline stage
we used to have a face primitive assembler that we ran after if the gs was missing but we had adjacency primitives in the pipeline, lets convert it to a pipeline stage, which allows us to use it to inject outputs (primitive id) into the vertices. it's also a lot cleaner because the decomposition is already handled for us. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/Makefile.sources |2 +- src/gallium/auxiliary/draw/draw_context.c |1 + src/gallium/auxiliary/draw/draw_pipe.c |4 + src/gallium/auxiliary/draw/draw_pipe.h |5 + src/gallium/auxiliary/draw/draw_pipe_ia.c | 253 src/gallium/auxiliary/draw/draw_pipe_validate.c| 15 +- src/gallium/auxiliary/draw/draw_prim_assembler.c | 225 - src/gallium/auxiliary/draw/draw_prim_assembler.h | 62 - .../auxiliary/draw/draw_prim_assembler_tmp.h | 31 --- src/gallium/auxiliary/draw/draw_private.h |1 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 18 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 18 +- 12 files changed, 283 insertions(+), 352 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.c delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.h delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler_tmp.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index acbcef7..ee93e8b 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -13,6 +13,7 @@ C_SOURCES := \ draw/draw_pipe_clip.c \ draw/draw_pipe_cull.c \ draw/draw_pipe_flatshade.c \ +draw/draw_pipe_ia.c \ draw/draw_pipe_offset.c \ draw/draw_pipe_pstipple.c \ draw/draw_pipe_stipple.c \ @@ -23,7 +24,6 @@ C_SOURCES := \ draw/draw_pipe_vbuf.c \ draw/draw_pipe_wide_line.c \ draw/draw_pipe_wide_point.c \ - draw/draw_prim_assembler.c \ draw/draw_pt.c \ draw/draw_pt_emit.c \ draw/draw_pt_fetch.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 8bf3596..bbb2904 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw) void draw_prepare_shader_outputs(struct draw_context *draw) { + draw_ia_prepare_outputs(draw, draw->pipeline.ia); draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); } diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index f1ee6cb..8140299 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -49,6 +49,7 @@ boolean draw_pipeline_init( struct draw_context *draw ) draw->pipeline.clip = draw_clip_stage( draw ); draw->pipeline.flatshade = draw_flatshade_stage( draw ); draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.ia= draw_ia_stage( draw ); draw->pipeline.validate = draw_validate_stage( draw ); draw->pipeline.first = draw->pipeline.validate; @@ -61,6 +62,7 @@ boolean draw_pipeline_init( struct draw_context *draw ) !draw->pipeline.clip || !draw->pipeline.flatshade || !draw->pipeline.cull || + !draw->pipeline.ia || !draw->pipeline.validate) return FALSE; @@ -95,6 +97,8 @@ void draw_pipeline_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); if (draw->pipeline.cull) draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.ia) + draw->pipeline.ia->destroy( draw->pipeline.ia ); if (draw->pipeline.validate) draw->pipeline.validate->destroy( draw->pipeline.validate ); if (draw->pipeline.aaline) diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 70c286f..70822a4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -91,7 +91,10 @@ extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); extern struct draw_stage *draw_validate_stage( struct draw_context *context ); +extern struct draw_stage *draw_ia_stage(struct draw_context *context); +boolean draw_ia_stage_required(const struct draw_context *context, + unsigned prim); extern void draw_free_temp_verts( struct draw_stage *stage ); extern boolean draw_alloc_temp_verts( struc
Re: [Mesa-dev] [PATCH 8/8] draw: implement proper primitive assembler as a pipeline stage
Yea, it's quite bonkers, but that's the way it has to be to make it work right now. Personally I'd really like to write a new version of draw, without the 5 emit paths, 4 different vertex shading paths, with interface that is capable of emitting more than just float[4]'s... For now though this works, even if it is very ugly. z - Original Message - > Am 02.08.2013 08:28, schrieb Zack Rusin: > > we used to have a face primitive assembler that we ran after if > > the gs was missing but we had adjacency primitives in the pipeline, > > lets convert it to a pipeline stage, which allows us to use it > > to inject outputs (primitive id) into the vertices. it's also > > a lot cleaner because the decomposition is already handled for us. > > > > Signed-off-by: Zack Rusin > > --- > > src/gallium/auxiliary/Makefile.sources |2 +- > > src/gallium/auxiliary/draw/draw_context.c |1 + > > src/gallium/auxiliary/draw/draw_pipe.c |4 + > > src/gallium/auxiliary/draw/draw_pipe.h |5 + > > src/gallium/auxiliary/draw/draw_pipe_ia.c | 253 > > > > src/gallium/auxiliary/draw/draw_pipe_validate.c| 15 +- > > src/gallium/auxiliary/draw/draw_prim_assembler.c | 225 > > - > > src/gallium/auxiliary/draw/draw_prim_assembler.h | 62 - > > .../auxiliary/draw/draw_prim_assembler_tmp.h | 31 --- > > src/gallium/auxiliary/draw/draw_private.h |1 + > > .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 18 +- > > .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 18 +- > > 12 files changed, 283 insertions(+), 352 deletions(-) > > create mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c > > delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.c > > delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.h > > delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler_tmp.h > > > > diff --git a/src/gallium/auxiliary/Makefile.sources > > b/src/gallium/auxiliary/Makefile.sources > > index acbcef7..ee93e8b 100644 > > --- a/src/gallium/auxiliary/Makefile.sources > > +++ b/src/gallium/auxiliary/Makefile.sources > > @@ -13,6 +13,7 @@ C_SOURCES := \ > > draw/draw_pipe_clip.c \ > > draw/draw_pipe_cull.c \ > > draw/draw_pipe_flatshade.c \ > > +draw/draw_pipe_ia.c \ > Formatting looks off here. > > > draw/draw_pipe_offset.c \ > > draw/draw_pipe_pstipple.c \ > > draw/draw_pipe_stipple.c \ > > @@ -23,7 +24,6 @@ C_SOURCES := \ > > draw/draw_pipe_vbuf.c \ > > draw/draw_pipe_wide_line.c \ > > draw/draw_pipe_wide_point.c \ > > - draw/draw_prim_assembler.c \ > > draw/draw_pt.c \ > > draw/draw_pt_emit.c \ > > draw/draw_pt_fetch.c \ > > diff --git a/src/gallium/auxiliary/draw/draw_context.c > > b/src/gallium/auxiliary/draw/draw_context.c > > index 8bf3596..bbb2904 100644 > > --- a/src/gallium/auxiliary/draw/draw_context.c > > +++ b/src/gallium/auxiliary/draw/draw_context.c > > @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw) > > void > > draw_prepare_shader_outputs(struct draw_context *draw) > > { > > + draw_ia_prepare_outputs(draw, draw->pipeline.ia); > > draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); > > } > > > > diff --git a/src/gallium/auxiliary/draw/draw_pipe.c > > b/src/gallium/auxiliary/draw/draw_pipe.c > > index f1ee6cb..8140299 100644 > > --- a/src/gallium/auxiliary/draw/draw_pipe.c > > +++ b/src/gallium/auxiliary/draw/draw_pipe.c > > @@ -49,6 +49,7 @@ boolean draw_pipeline_init( struct draw_context *draw ) > > draw->pipeline.clip = draw_clip_stage( draw ); > > draw->pipeline.flatshade = draw_flatshade_stage( draw ); > > draw->pipeline.cull = draw_cull_stage( draw ); > > + draw->pipeline.ia= draw_ia_stage( draw ); > > draw->pipeline.validate = draw_validate_stage( draw ); > > draw->pipeline.first = draw->pipeline.validate; > > > > @@ -61,6 +62,7 @@ boolean draw_pipeline_init( struct draw_context *draw ) > > !draw->pipeline.clip || > > !draw->pipeline.flatshade || > > !draw->pipeline.cull || > > + !draw->pipeline.ia || > > !draw->pipeline.validate) > >return FALSE; > > > > @@ -95,6 +97,8 @@ void draw_pipeline_destroy( struct draw_context *draw ) > >draw-&
Re: [Mesa-dev] [PATCH] util: implement table-based + linear interpolation linear-to-srgb conversion
Looks good to me. A small comment above the disabled version noting that it's disabled because it's a bit slower might be useful for the next person who reads the code. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > Should be much faster, seems to work in softpipe. > While here (also it's now disabled) fix up the pow factor - the former value > is what is in GL core it is however not actually accurate to fp32 standard > (as it is 1.0/2.4), and if someone would do all the accurate math there's no > reason to waste 8 mantissa bits or so... > > v2: use real table generating function instead of just printing the values > (might take a bit longer as it does calculations on some 3+ million floats > but much more descriptive obviously). > Also fix up another pow factor (this time in the python code) - wondering > where the couple one bit errors came from :-(. > --- > src/gallium/auxiliary/util/u_format_srgb.h | 55 > +- > src/gallium/auxiliary/util/u_format_srgb.py | 57 > ++- > 2 files changed, 101 insertions(+), 11 deletions(-) > > diff --git a/src/gallium/auxiliary/util/u_format_srgb.h > b/src/gallium/auxiliary/util/u_format_srgb.h > index 82ed957..f3e1b20 100644 > --- a/src/gallium/auxiliary/util/u_format_srgb.h > +++ b/src/gallium/auxiliary/util/u_format_srgb.h > @@ -39,6 +39,7 @@ > > > #include "pipe/p_compiler.h" > +#include "u_pack_color.h" > #include "u_math.h" > > > @@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256]; > extern const uint8_t > util_format_linear_to_srgb_8unorm_table[256]; > > +extern const unsigned > +util_format_linear_to_srgb_helper_table[104]; > + > > /** > * Convert a unclamped linear float to srgb value in the [0,255]. > - * XXX this hasn't been tested (render to srgb surface). > - * XXX this needs optimization. > */ > static INLINE uint8_t > util_format_linear_float_to_srgb_8unorm(float x) > { > - if (x >= 1.0f) > - return 255; > - else if (x >= 0.0031308f) > - return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f); > - else if (x > 0.0f) > - return float_to_ubyte(12.92f * x); > - else > - return 0; > + if (0) { > + if (x >= 1.0f) > + return 255; > + else if (x >= 0.0031308f) > + return float_to_ubyte(1.055f * powf(x, 0.4166f) - 0.055f); > + else if (x > 0.0f) > + return float_to_ubyte(12.92f * x); > + else > + return 0; > + } > + else { > + /* > + * This is taken from https://gist.github.com/rygorous/2203834 > + * Use LUT and do linear interpolation. > + */ > + union fi almostone, minval, f; > + unsigned tab, bias, scale, t; > + > + almostone.ui = 0x3f7f; > + minval.ui = (127-13) << 23; > + > + /* > + * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, > respectively. > + * The tests are carefully written so that NaNs map to 0, same as in > the > + * reference implementation. > + */ > + if (!(x > minval.f)) > + x = minval.f; > + if (x > almostone.f) > + x = almostone.f; > + > + /* Do the table lookup and unpack bias, scale */ > + f.f = x; > + tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> > 20]; > + bias = (tab >> 16) << 9; > + scale = tab & 0x; > + > + /* Grab next-highest mantissa bits and perform linear interpolation */ > + t = (f.ui >> 12) & 0xff; > + return (uint8_t) ((bias + scale*t) >> 16); > + } > } > > > diff --git a/src/gallium/auxiliary/util/u_format_srgb.py > b/src/gallium/auxiliary/util/u_format_srgb.py > index cd63ae7..c6c02f0 100644 > --- a/src/gallium/auxiliary/util/u_format_srgb.py > +++ b/src/gallium/auxiliary/util/u_format_srgb.py > @@ -40,6 +40,7 @@ CopyRight = ''' > > > import math > +import struct > > > def srgb_to_linear(x): > @@ -51,10 +52,11 @@ def srgb_to_linear(x): > > def linear_to_srgb(x): > if x >= 0.0031308: > -return 1.055 * math.pow(x, 0.41666) - 0.055 > +return 1.055 * math.pow(x, 0.4166) - 0.055 > else: > return 12.92 * x > > + > def generate_srgb_tables(): > print 'const float' > print 'util_format_srgb_8unorm_to_linear_float_table[256] = {' > @@ -84,6 +86,59 @@ def generate_srgb_tables(): > print '};
[Mesa-dev] [PATCH] draw: fix slot detection
Nowadays -1 for slots means that the semantic is not present, so we need to store it in a signed variables, otherwise <0 comparisons are pointless. Fixes http://bugzilla.eng.vmware.com/show_bug.cgi?id=67811 (at least with softpipe, edgeflags don't work wit llvmpipe) Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_unfilled.c |2 +- src/gallium/drivers/llvmpipe/lp_setup_context.h |2 +- src/gallium/drivers/llvmpipe/lp_setup_line.c|1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index c6ee95c..68bab72 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -67,7 +67,7 @@ inject_front_face_info(struct draw_stage *stage, boolean is_front_face = ( (stage->draw->rasterizer->front_ccw && ccw) || (!stage->draw->rasterizer->front_ccw && !ccw)); - unsigned slot = unfilled->face_slot; + int slot = unfilled->face_slot; unsigned i; /* In case the backend doesn't care about it */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index ea1d0d6..44be85f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -106,7 +106,7 @@ struct lp_setup_context float psize; unsigned viewport_index_slot; unsigned layer_slot; - unsigned face_slot; + int face_slot; struct pipe_framebuffer_state fb; struct u_rect framebuffer; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 3b16163..a25a6b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -622,7 +622,6 @@ try_setup_line( struct lp_setup_context *setup, } else { line->inputs.frontfacing = TRUE; } - /* Setup parameter interpolants: */ -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallivm: propagate scalar_lod to emit_size_query too
- Original Message - > From: Roland Scheidegger > > Clearly the returned values need to be per-element if the lod is per element. > Does not actually change behavior yet. Looks good. For the entire series: Reviewed-by: Zack Rusin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: honor d3d10 floating point rules for shadow comparisons
- Original Message - > From: Roland Scheidegger > > d3d10 specifies ordered comparisons for everything but not_equal which is > unordered > (http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx). > OpenGL probably doesn't care. This series looks good too. For all three: Reviewed-by: Zack Rusin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] draw: cleanup the extra attribs
Before inserting new front face and prim id outputs cleanup the old extra outputs, otherwise our cache will use previous output slots which will break as soon as outputs of the current shader don't match the last. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_context.c |1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index af9caee..2dc6772 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw) void draw_prepare_shader_outputs(struct draw_context *draw) { + draw_remove_extra_vertex_attribs(draw); draw_ia_prepare_outputs(draw, draw->pipeline.ia); draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] draw: reset the vertex id when injecting new primitive id
Without reseting the vertex id, with primitives where the same vertex is used with different primitives (e.g. tri/lines strips) our vbuf module won't re-emit those vertices with the changed primitive id. So lets reset the vertex id whenever injecting new primitive id to make sure that the vertex data is correctly emitted. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_ia.c |9 + 1 file changed, 9 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_pipe_ia.c b/src/gallium/auxiliary/draw/draw_pipe_ia.c index ecbb233..d64f19b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_ia.c +++ b/src/gallium/auxiliary/draw/draw_pipe_ia.c @@ -68,6 +68,15 @@ inject_primid(struct draw_stage *stage, for (i = 0; i < num_verts; ++i) { struct vertex_header *v = header->v[i]; + /* We have to reset the vertex_id because it's used by + * vbuf to figure out if the vertex had already been + * emitted. For line/tri strips the first vertex of + * subsequent primitives would already be emitted, + * but since we're changing the primitive id on the vertex + * we want to make sure it's reemitted with the correct + * data. + */ + v->vertex_id = UNDEFINED_VERTEX_ID; memcpy(&v->data[slot][0], &primid, sizeof(primid)); memcpy(&v->data[slot][1], &primid, sizeof(primid)); memcpy(&v->data[slot][2], &primid, sizeof(primid)); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler
We can't be injecting the primitive id's in the pipeline because by that time the primitives have already been decomposed. To properly number the primitives we need to handle the adjacency primitives by hand. This patch moves the prim id injection into the original primitive assembler and completely removes the useless pipeline stage. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/Makefile.sources |1 - src/gallium/auxiliary/draw/draw_context.c|8 +- src/gallium/auxiliary/draw/draw_pipe.c |4 - src/gallium/auxiliary/draw/draw_pipe.h |7 - src/gallium/auxiliary/draw/draw_pipe_ia.c| 259 -- src/gallium/auxiliary/draw/draw_pipe_validate.c | 14 -- src/gallium/auxiliary/draw/draw_prim_assembler.c | 168 +- src/gallium/auxiliary/draw/draw_prim_assembler.h | 12 + src/gallium/auxiliary/draw/draw_private.h|4 +- 9 files changed, 180 insertions(+), 297 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index b0172de..acbcef7 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -13,7 +13,6 @@ C_SOURCES := \ draw/draw_pipe_clip.c \ draw/draw_pipe_cull.c \ draw/draw_pipe_flatshade.c \ -draw/draw_pipe_ia.c \ draw/draw_pipe_offset.c \ draw/draw_pipe_pstipple.c \ draw/draw_pipe_stipple.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2dc6772..2d4843e 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -40,6 +40,7 @@ #include "util/u_prim.h" #include "draw_context.h" #include "draw_pipe.h" +#include "draw_prim_assembler.h" #include "draw_vs.h" #include "draw_gs.h" @@ -95,6 +96,10 @@ draw_create_context(struct pipe_context *pipe, boolean try_llvm) if (!draw_init(draw)) goto err_destroy; + draw->ia = draw_prim_assembler_create(draw); + if (!draw->ia) + goto err_destroy; + return draw; err_destroy: @@ -206,6 +211,7 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_prim_assembler_destroy(draw->ia); draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); draw_vs_destroy( draw ); @@ -556,7 +562,7 @@ void draw_prepare_shader_outputs(struct draw_context *draw) { draw_remove_extra_vertex_attribs(draw); - draw_ia_prepare_outputs(draw, draw->pipeline.ia); + draw_prim_assembler_prepare_outputs(draw->ia); draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); } diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 8140299..f1ee6cb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -49,7 +49,6 @@ boolean draw_pipeline_init( struct draw_context *draw ) draw->pipeline.clip = draw_clip_stage( draw ); draw->pipeline.flatshade = draw_flatshade_stage( draw ); draw->pipeline.cull = draw_cull_stage( draw ); - draw->pipeline.ia= draw_ia_stage( draw ); draw->pipeline.validate = draw_validate_stage( draw ); draw->pipeline.first = draw->pipeline.validate; @@ -62,7 +61,6 @@ boolean draw_pipeline_init( struct draw_context *draw ) !draw->pipeline.clip || !draw->pipeline.flatshade || !draw->pipeline.cull || - !draw->pipeline.ia || !draw->pipeline.validate) return FALSE; @@ -97,8 +95,6 @@ void draw_pipeline_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); if (draw->pipeline.cull) draw->pipeline.cull->destroy( draw->pipeline.cull ); - if (draw->pipeline.ia) - draw->pipeline.ia->destroy( draw->pipeline.ia ); if (draw->pipeline.validate) draw->pipeline.validate->destroy( draw->pipeline.validate ); if (draw->pipeline.aaline) diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 70822a4..7c9ed6c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -91,10 +91,6 @@ extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); extern struct draw_stage *draw_validate_stage( struct draw_context *context ); -extern struct draw_stage *draw_ia_stage(struct draw_context *context); - -boolean draw_ia_stage_required(const struct draw_context *context, -
Re: [Mesa-dev] [PATCH 2/3] draw: reset the vertex id when injecting new primitive id
Don't worry about this one too much. The next patch removes draw_pipe_ia.c anyway... - Original Message - > Without reseting the vertex id, with primitives where the same > vertex is used with different primitives (e.g. tri/lines strips) > our vbuf module won't re-emit those vertices with the changed > primitive id. So lets reset the vertex id whenever injecting > new primitive id to make sure that the vertex data is correctly > emitted. > > Signed-off-by: Zack Rusin > --- > src/gallium/auxiliary/draw/draw_pipe_ia.c |9 + > 1 file changed, 9 insertions(+) > > diff --git a/src/gallium/auxiliary/draw/draw_pipe_ia.c > b/src/gallium/auxiliary/draw/draw_pipe_ia.c > index ecbb233..d64f19b 100644 > --- a/src/gallium/auxiliary/draw/draw_pipe_ia.c > +++ b/src/gallium/auxiliary/draw/draw_pipe_ia.c > @@ -68,6 +68,15 @@ inject_primid(struct draw_stage *stage, > > for (i = 0; i < num_verts; ++i) { >struct vertex_header *v = header->v[i]; > + /* We have to reset the vertex_id because it's used by > + * vbuf to figure out if the vertex had already been > + * emitted. For line/tri strips the first vertex of > + * subsequent primitives would already be emitted, > + * but since we're changing the primitive id on the vertex > + * we want to make sure it's reemitted with the correct > + * data. > + */ > + v->vertex_id = UNDEFINED_VERTEX_ID; >memcpy(&v->data[slot][0], &primid, sizeof(primid)); >memcpy(&v->data[slot][1], &primid, sizeof(primid)); >memcpy(&v->data[slot][2], &primid, sizeof(primid)); > -- > 1.7.10.4 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallivm: use texture target from shader instead of static state for size query
Series looks good to me. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > d3d10 has no notion of distinct array resources neither at the resource nor > sampler view level. However, shader dcl of resources certainly has, and > d3d10 expects resinfo to return the values according to that - in particular > a resource might have been a 1d texture with some array layers, then the > sampler view might have only used 1 layer so it can be accessed both as 1d > or 1d array texture (I think - the former definitely works). resinfo of a > resource decleared as array needs to return number of array layers but > non-array resource needs to return 0 (and not 1). Hence fix this by passing > the target from the shader decl to emit_size_query and use that (in case of > OpenGL the target will come from the instruction itself). > Could probably do the same for actual sampling, though it may not matter > there > (as the bogus components will essentially get clamped away), possibly could > wreak havoc though if it REALLY doesn't match (which is of course an error > but still). > --- > src/gallium/auxiliary/draw/draw_llvm_sample.c |2 + > src/gallium/auxiliary/gallivm/lp_bld_sample.h |1 + > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 32 ++- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.h |1 + > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 43 > - > src/gallium/drivers/llvmpipe/lp_tex_sample.c |2 + > 6 files changed, 77 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c > b/src/gallium/auxiliary/draw/draw_llvm_sample.c > index 3016d7c..f10cba3 100644 > --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c > +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c > @@ -270,6 +270,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct > lp_build_sampler_soa *base, >struct gallivm_state *gallivm, >struct lp_type type, >unsigned texture_unit, > + unsigned target, >boolean need_nr_mips, >boolean scalar_lod, >LLVMValueRef explicit_lod, /* optional >*/ > @@ -284,6 +285,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct > lp_build_sampler_soa *base, > &sampler->dynamic_state.base, > type, > texture_unit, > + target, > need_nr_mips, > scalar_lod, > explicit_lod, > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > index dff8be2..db3ea1d 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > @@ -497,6 +497,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > struct lp_sampler_dynamic_state *dynamic_state, > struct lp_type int_type, > unsigned texture_unit, > +unsigned target, > boolean need_nr_mips, > boolean scalar_lod, > LLVMValueRef explicit_lod, > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > index b0bb58b..e403ac8 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > @@ -1943,6 +1943,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > struct lp_sampler_dynamic_state *dynamic_state, > struct lp_type int_type, > unsigned texture_unit, > +unsigned target, > boolean need_nr_mips, > boolean scalar_lod, > LLVMValueRef explicit_lod, > @@ -1955,9 +1956,36 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > unsigned num_lods = 1; > struct lp_build_context bld_int_vec; > > - dims = texture_dims(static_state->target); > + /* > +* Do some sanity verification about bound texture and shader dcl target. > +* Not entirely sure what's possible but assume array/non-array > +* always compatible (probably not ok for OpenGL but d3d10 has no > +
Re: [Mesa-dev] [PATCH] gallivm: set non-existing values really to zero in size queries for d3d10
Looks good. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > My previous attempt at doing so double-failed miserably (minification of > zero still gives one, and even if it would not the value was never written > anyway). > While here also rename the confusingly named int_vec bld as we have int vecs > of different sizes, and rename need_nr_mips (as this also changes > out-of-bounds > behavior) to is_sviewinfo too. > --- > src/gallium/auxiliary/draw/draw_llvm_sample.c |4 +-- > src/gallium/auxiliary/gallivm/lp_bld_sample.h |2 +- > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 34 > ++--- > src/gallium/drivers/llvmpipe/lp_tex_sample.c |4 +-- > 4 files changed, 22 insertions(+), 22 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c > b/src/gallium/auxiliary/draw/draw_llvm_sample.c > index f10cba3..97b0255 100644 > --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c > +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c > @@ -271,7 +271,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct > lp_build_sampler_soa *base, >struct lp_type type, >unsigned texture_unit, >unsigned target, > - boolean need_nr_mips, > + boolean is_sviewinfo, >boolean scalar_lod, >LLVMValueRef explicit_lod, /* optional >*/ >LLVMValueRef *sizes_out) > @@ -286,7 +286,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct > lp_build_sampler_soa *base, > type, > texture_unit, > target, > - need_nr_mips, > + is_sviewinfo, > scalar_lod, > explicit_lod, > sizes_out); > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > index db3ea1d..75e8c59 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > @@ -498,7 +498,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > struct lp_type int_type, > unsigned texture_unit, > unsigned target, > -boolean need_nr_mips, > +boolean is_viewinfo, > boolean scalar_lod, > LLVMValueRef explicit_lod, > LLVMValueRef *sizes_out); > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > index e403ac8..65d6e7b 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > @@ -1944,7 +1944,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > struct lp_type int_type, > unsigned texture_unit, > unsigned target, > -boolean need_nr_mips, > +boolean is_sviewinfo, > boolean scalar_lod, > LLVMValueRef explicit_lod, > LLVMValueRef *sizes_out) > @@ -1954,7 +1954,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, > int dims, i; > boolean has_array; > unsigned num_lods = 1; > - struct lp_build_context bld_int_vec; > + struct lp_build_context bld_int_vec4; > > /* > * Do some sanity verification about bound texture and shader dcl target. > @@ -1997,24 +1997,19 @@ lp_build_size_query_soa(struct gallivm_state > *gallivm, > > assert(!int_type.floating); > > - lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128)); > + lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128)); > > if (explicit_lod) { >/* FIXME: this needs to honor per-element lod */ >lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, >lp_build_const_int32(gallivm, 0), ""); >first_level = dynamic_state->first_level(dynamic_state, gallivm, >texture_unit); >level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level"); > - lod = lp_build_broadcast_scalar(&bld_i
Re: [Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler
> Series looks good though I'm unsure why the pipeline stage doesn't work. > Where does that decomposition happen? Is that something like GS > outputting multiple prims in the same topology which all need the same id? No, it's because the pipeline stage is ran on the decomposed primitives. The issue is that the pipeline stage is ran after stream output and stream output requires decomposed primitives, meaning that by the time we get to the pipeline we lost the original primitive info. The d3d10 wants the primitive id's to be injected into vertices but in the order in which they are traversed on the original (striped) primitives, so we need to do it when doing the original decomposition where we have access to the original topology and can number the vertices correctly. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler
> Am 09.08.2013 00:40, schrieb Zack Rusin: > > > Series looks good though I'm unsure why the pipeline stage doesn't work. > >> Where does that decomposition happen? Is that something like GS > >> outputting multiple prims in the same topology which all need the same id? > > > > No, it's because the pipeline stage is ran on the decomposed primitives. > > The issue is that the pipeline stage is ran after stream output and stream > > output requires decomposed primitives, meaning that by the time we get to > > the pipeline we lost the original primitive info. The d3d10 wants the > > primitive id's to be injected into vertices but in the order in which they > > are traversed on the original (striped) primitives, so we need to do it > > when doing the original decomposition where we have access to the original > > topology and can number the vertices correctly. > > > > z > > > > I see I totally forgot stream out needs decomposed primitives, and I > guess stream out (and prim assembler) can't run as an ordinary pipeline > stage? I was thinking about that when I was doing it and I thought it should be possible to rewrite SO as a pipeline stage, but we'd need to change the interface to include some sort of a prepare stage and then redo the code in so. Once so would be in a pipeline then we could think about primitive assembler, but that would require also more changes to the pipeline because we want to know if the primitives are adjacency primitives and pipeline stages get only tris/lines/points... and this was the point at which I went "screw it, i'm injecting prim ids in the primitive assembler". z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC]: gallium: add new float comparison opcodes returning integer booleans
- Original Message - > This is a proposal for new comparison instructions, as the old ones > don't really fit modern (graphic or opencl I guess for that matter) > languages well. > If you've got objections, think the naming is crazy or whatnot I'm open > for suggestions :-). I would think this is not just a much better fit > for d3d10/glsl but for hw as well. Yea, that makes sense to me. Comparison instructions should return consistent results across types. I'd just add a line or so to the docs to make it explicit how they're different from the old opcodes, I expect that for people new to gallium it's going to be easy to miss. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: simplify geometry shader mask handling a bit
> From: Roland Scheidegger > > Instead of reducing masks to 0/1 simply use the mask directly as -1. > Also use some signed comparison instead of unsigned (as far as I understand > these values have to be (very) small and signed means llvm doesn't have to > apply additional logic to do the unsigned comparisons the cpu can't do). > Saves a couple of instructions in some test geometry shader here. > > v2: that was a bit to much optimization, don't skip combining the masks... k, I think that one looks good. Reviewed-by: Zack Rusin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] draw: simplify prim mask construction
Looks good. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > The code was quite weird, the second comparison was in fact a complete no-op > and we can also do the comparison with the vector directly instead of scalar, > which should not also be faster but it is way more obvious how that mask > is actually going to look like. > (Not sure how many instructions that saves as it turned out the mask wasn't > used in the test geometry shader I used at all after all...) > --- > src/gallium/auxiliary/draw/draw_llvm.c | 32 > ++-- > 1 file changed, 10 insertions(+), 22 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_llvm.c > b/src/gallium/auxiliary/draw/draw_llvm.c > index 68f6369..84e3392 100644 > --- a/src/gallium/auxiliary/draw/draw_llvm.c > +++ b/src/gallium/auxiliary/draw/draw_llvm.c > @@ -2040,31 +2040,19 @@ generate_mask_value(struct draw_gs_llvm_variant > *variant, > { > struct gallivm_state *gallivm = variant->gallivm; > LLVMBuilderRef builder = gallivm->builder; > - LLVMValueRef bits[16]; > - struct lp_type mask_type = lp_int_type(gs_type); > - struct lp_type mask_elem_type = lp_elem_type(mask_type); > - LLVMValueRef mask_val = lp_build_const_vec(gallivm, > - mask_type, > - 0); > + struct lp_type mask_type = lp_int_type(gs_type); > + LLVMValueRef num_prims; > + LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0); > unsigned i; > > - assert(gs_type.length <= Elements(bits)); > - > - for (i = gs_type.length; i >= 1; --i) { > - int idx = i - 1; > - LLVMValueRef ind = lp_build_const_int32(gallivm, i); > - bits[idx] = lp_build_compare(gallivm, > - mask_elem_type, PIPE_FUNC_GEQUAL, > - variant->num_prims, ind); > - } > - for (i = 0; i < gs_type.length; ++i) { > - LLVMValueRef ind = lp_build_const_int32(gallivm, i); > - mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, > ""); > + num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, > mask_type), > + variant->num_prims); > + for (i = 0; i <= gs_type.length; i++) { > + LLVMValueRef idx = lp_build_const_int32(gallivm, i); > + mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, ""); > } > - mask_val = lp_build_compare(gallivm, > - mask_type, PIPE_FUNC_NOTEQUAL, > - mask_val, > - lp_build_const_int_vec(gallivm, mask_type, > 0)); > + mask_val = lp_build_compare(gallivm, mask_type, > + PIPE_FUNC_GREATER, num_prims, mask_val); > > return mask_val; > } > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: fix exec_mask interaction with geometry shader after end of main
Ah, that looks like a great catch. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > Because we must maintain an exec_mask even if there's currently nothing > on the mask stack, we can still have an exec_mask at the end of the program. > Effectively, this mask should be set back to default when returning from > main. > Without relying on END/RET opcode (I think it's valid to have neither) it is > actually difficult to do this, as there doesn't seem any reasonable place to > do it, so instead let's just say the exec_mask is invalid outside main (which > it really is effectively). > The problem is that geometry shader called end_primitive outside the shader > (in the epilogue), and as a result used a bogus mask, leading to bugs if we > had to set the (somewhat misnamed) ret_in_main bit anywhere. So just avoid > the mask combining function when called from outside the shader. > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |2 +- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 28 > +++ > 2 files changed, 14 insertions(+), 16 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > index 495940c..5a9e8d0 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c > @@ -466,7 +466,7 @@ lp_build_tgsi_llvm( > > while (bld_base->pc != -1) { >struct tgsi_full_instruction *instr = bld_base->instructions + > - bld_base->pc; > + bld_base->pc; >const struct tgsi_opcode_info *opcode_info = > tgsi_get_opcode_info(instr->Instruction.Opcode); >if (!lp_build_tgsi_inst_llvm(bld_base, instr)) { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 589ea4f..db8e997 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -2691,11 +2691,21 @@ end_primitive_masked(struct lp_build_tgsi_context * > bld_base, > LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > > if (bld->gs_iface->end_primitive) { > + struct lp_build_context *uint_bld = &bld_base->uint_bld; >LLVMValueRef emitted_vertices_vec = > LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); >LLVMValueRef emitted_prims_vec = > LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); > > + LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, > + emitted_vertices_vec, > + uint_bld->zero); > + /* We need to combine the current execution mask with the mask > + telling us which, if any, execution slots actually have > + unemitted primitives, this way we make sure that end_primitives > + executes only on the paths that have unflushed vertices */ > + mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); > + >bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, > emitted_vertices_vec, > emitted_prims_vec); > @@ -2735,20 +2745,7 @@ end_primitive( > struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); > > if (bld->gs_iface->end_primitive) { > - LLVMBuilderRef builder = bld_base->base.gallivm->builder; >LLVMValueRef mask = mask_vec(bld_base); > - struct lp_build_context *uint_bld = &bld_base->uint_bld; > - LLVMValueRef emitted_verts = LLVMBuildLoad( > - builder, bld->emitted_vertices_vec_ptr, ""); > - LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, > - emitted_verts, > - uint_bld->zero); > - /* We need to combine the current execution mask with the mask > - telling us which, if any, execution slots actually have > - unemitted primitives, this way we make sure that end_primitives > - executes only on the paths that have unflushed vertices */ > - mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); > - >end_primitive_masked(bld_base, mask); > } > } > @@ -3148,8 +3145,9 @@ static void emit_epilogue(struct lp_build_tgsi_context > * bld_base) >LLVMValueRef total_emitted_vertice
Re: [Mesa-dev] [PATCH 3/3] gallivm: implement new float comparison instructions returning integer masks
Nice. The entire series looks good. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > FSEQ/FSGE/FSLT/FSNE work just the same as SEQ/SGE/SLT/SNE except skip the > select. > And just for consistency use the same appropriate ordered/unordered > comparisons > for the old opcodes as well. > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 81 > +++- > 1 file changed, 79 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > index f461661..86c3249 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > @@ -1094,6 +1094,70 @@ f2u_emit_cpu( > emit_data->args[0]); > } > > +/* TGSI_OPCODE_FSET Helper (CPU Only) */ > +static void > +fset_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data, > + unsigned pipe_func) > +{ > + LLVMValueRef cond; > + > + if (pipe_func != PIPE_FUNC_NOTEQUAL) { > + cond = lp_build_cmp_ordered(&bld_base->base, pipe_func, > + emit_data->args[0], emit_data->args[1]); > + } > + else { > + cond = lp_build_cmp(&bld_base->base, pipe_func, > + emit_data->args[0], emit_data->args[1]); > + > + } > + emit_data->output[emit_data->chan] = cond; > +} > + > + > +/* TGSI_OPCODE_FSEQ (CPU Only) */ > +static void > +fseq_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL); > +} > + > +/* TGSI_OPCODE_ISGE (CPU Only) */ > +static void > +fsge_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL); > +} > + > +/* TGSI_OPCODE_ISLT (CPU Only) */ > +static void > +fslt_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS); > +} > + > +/* TGSI_OPCODE_USNE (CPU Only) */ > + > +static void > +fsne_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL); > +} > + > /* TGSI_OPCODE_FLR (CPU Only) */ > > static void > @@ -1396,8 +1460,17 @@ set_emit_cpu( > struct lp_build_emit_data * emit_data, > unsigned pipe_func) > { > - LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func, > -emit_data->args[0], emit_data->args[1]); > + LLVMValueRef cond; > + > + if (pipe_func != PIPE_FUNC_NOTEQUAL) { > + cond = lp_build_cmp_ordered(&bld_base->base, pipe_func, > + emit_data->args[0], emit_data->args[1]); > + } > + else { > + cond = lp_build_cmp(&bld_base->base, pipe_func, > + emit_data->args[0], emit_data->args[1]); > + > + } > emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, >cond, >bld_base->base.one, > @@ -1716,6 +1789,10 @@ lp_set_default_actions_cpu( > bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu; > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] draw: make sure that the stages setup outputs
Calling the prepare outputs cleans up the slot assignments for outputs, unfortunately aapoint and aaline didn't have code to reset their slots after the initial setup, this was messing up our slot assignments. The unfilled stage was just missing the initial assignment of the face slot. This fixes all of the reported piglit failures. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_context.c |2 + src/gallium/auxiliary/draw/draw_pipe.h |5 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 27 --- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 56 ++- src/gallium/auxiliary/draw/draw_pipe_unfilled.c |2 + 5 files changed, 62 insertions(+), 30 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2d4843e..d1fac0c 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -564,6 +564,8 @@ draw_prepare_shader_outputs(struct draw_context *draw) draw_remove_extra_vertex_attribs(draw); draw_prim_assembler_prepare_outputs(draw->ia); draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); + draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint); + draw_aaline_prepare_outputs(draw, draw->pipeline.aaline); } /** diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 7c9ed6c..ad3165f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -101,7 +101,10 @@ void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *hea void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header); void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); - +void draw_aapoint_prepare_outputs(struct draw_context *context, + struct draw_stage *stage); +void draw_aaline_prepare_outputs(struct draw_context *context, + struct draw_stage *stage); void draw_unfilled_prepare_outputs(struct draw_context *context, struct draw_stage *stage); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index aa88459..c44c236 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -692,13 +692,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) return; } - /* update vertex attrib info */ - aaline->pos_slot = draw_current_shader_position_output(draw);; - - /* allocate the extra post-transformed vertex attribute */ - aaline->tex_slot = draw_alloc_extra_vertex_attrib(draw, - TGSI_SEMANTIC_GENERIC, - aaline->fs->generic_attrib); + draw_aaline_prepare_outputs(draw, draw->pipeline.aaline); /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -953,6 +947,25 @@ aaline_set_sampler_views(struct pipe_context *pipe, } +void +draw_aaline_prepare_outputs(struct draw_context *draw, +struct draw_stage *stage) +{ + struct aaline_stage *aaline = aaline_stage(stage); + const struct pipe_rasterizer_state *rast = draw->rasterizer; + + /* update vertex attrib info */ + aaline->pos_slot = draw_current_shader_position_output(draw);; + + if (!rast->line_smooth) + return; + + /* allocate the extra post-transformed vertex attribute */ + aaline->tex_slot = draw_alloc_extra_vertex_attrib(draw, + TGSI_SEMANTIC_GENERIC, + aaline->fs->generic_attrib); +} + /** * Called by drivers that want to install this AA line prim stage * into the draw module's pipeline. This will not be used if the diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 0d7b88e..7ae1ddd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -696,28 +696,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) */ bind_aapoint_fragment_shader(aapoint); - /* update vertex attrib info */ - aapoint->pos_slot = draw_current_shader_position_output(draw); - - /* allocate the extra post-transformed vertex attribute */ - aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw, - TGSI_SEMANTIC_GENERIC, - aapoint->fs->generic_attrib); - assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - - /* find psize slot i
[Mesa-dev] [PATCH] llvmpipe: fix pipeline statistics with a null ps
If the fragment shader is null then pixel shader invocations have to be equal to zero. And if we're running a null ps then clipper invocations and primitives should be equal to zero but only if both stancil and depth testing are disabled. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_query.c | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index cea2d07..fb24c36 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -32,6 +32,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" +#include "tgsi/tgsi_scan.h" #include "util/u_memory.h" #include "os/os_time.h" #include "lp_context.h" @@ -95,6 +96,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, union pipe_query_result *vresult) { struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned num_threads = MAX2(1, screen->num_threads); struct llvmpipe_query *pq = llvmpipe_query(q); uint64_t *result = (uint64_t *)vresult; @@ -166,11 +168,31 @@ llvmpipe_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *stats = (struct pipe_query_data_pipeline_statistics *)vresult; - /* only ps_invocations come from binned query */ - for (i = 0; i < num_threads; i++) { - pq->stats.ps_invocations += pq->end[i]; + /* If we're running on what's considrered a null fragment + * shader, i.e. fragment shader consisting of a single + * END opcode or if the fragment shader is null then + * the number of ps_invocations should be zero */ + if (llvmpipe->fs && llvmpipe->fs->info.base.num_tokens > 1) { + /* only ps_invocations come from binned query */ + for (i = 0; i < num_threads; i++) { +pq->stats.ps_invocations += pq->end[i]; + } + pq->stats.ps_invocations *= +LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE; + } else { + /* + * Clipper primitives and invocations are equal to zero + * if we're running a null fragment shader but only + * if both stencil and depth testing are disabled. + */ + if (!llvmpipe->depth_stencil->depth.enabled && + !llvmpipe->depth_stencil->stencil[0].enabled && + !llvmpipe->depth_stencil->stencil[1].enabled) { +pq->stats.c_primitives = 0; +pq->stats.c_invocations = 0; + } + pq->stats.ps_invocations = 0; } - pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE; *stats = pq->stats; } break; -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: fix pipeline statistics with a null ps
If the fragment shader is null then pixel shader invocations have to be equal to zero. And if we're running a null ps then clipper invocations and primitives should be equal to zero but only if both stancil and depth testing are disabled. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_rast.c|3 ++- src/gallium/drivers/llvmpipe/lp_rast_priv.h |3 ++- src/gallium/drivers/llvmpipe/lp_setup_line.c |3 ++- src/gallium/drivers/llvmpipe/lp_setup_point.c |3 ++- src/gallium/drivers/llvmpipe/lp_setup_tri.c |3 ++- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c |9 +++-- src/gallium/drivers/llvmpipe/lp_state_fs.c| 24 +++- src/gallium/drivers/llvmpipe/lp_state_fs.h|4 8 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 49cdbfe..af661e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -35,6 +35,7 @@ #include "os/os_time.h" #include "lp_scene_queue.h" +#include "lp_context.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_perf.h" @@ -459,7 +460,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { /* not very accurate would need a popcount on the mask */ /* always count this not worth bothering? */ - task->ps_invocations++; + task->ps_invocations += 1 * variant->ps_inv_multiplier; /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b8bc99c..41fe097 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -100,6 +100,7 @@ struct lp_rasterizer_task /* occlude counter for visible pixels */ struct lp_jit_thread_data thread_data; uint64_t ps_invocations; + uint8_t ps_inv_multiplier; pipe_semaphore work_ready; pipe_semaphore work_done; @@ -308,7 +309,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { /* not very accurate would need a popcount on the mask */ /* always count this not worth bothering? */ - task->ps_invocations++; + task->ps_invocations += 1 * variant->ps_inv_multiplier; /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index a25a6b0..e1686ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -600,7 +600,8 @@ try_setup_line( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { lp_context->pipeline_statistics.c_primitives++; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index cbcc8d4..45068ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -384,7 +384,8 @@ try_setup_point( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { lp_context->pipeline_statistics.c_primitives++; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 579f351..23bc6e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -340,7 +340,8 @@ do_triangle_ccw(struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { lp_context->pipeline_statistics.c_primitives++; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 8173994..bf9f7e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -565,8 +565,13 @@ lp_setup_pipeline_statistics( stats->gs_invocations; llvmpipe->pipeline_statistics.gs_primitives += stats->gs_primitives; - llvmpipe->pipeline_statistics.c_invocations += - stats->c_invocations; + if (!llvmpipe_rasterization_disabled(llvmpipe)) { + llvmpipe->pipeline_statistics.c_invocations +=
Re: [Mesa-dev] [PATCH] gallivm: already pass coords in the right place in the sampler interface
I have to admit that I don't know the sampling code, but the patches look good to me. z - Original Message - > From: Roland Scheidegger > > This makes things a bit nicer, and more importantly it fixes an issue > where a "downgraded" array texture (due to view reduced to 1 layer and > addressed with (non-array) samplec instruction) would use the wrong > coord as shadow reference value. (This could also be fixed by passing > target through the sampler interface much the same way as is done for > size queries, might do this eventually anyway.) > And if we'd ever want to support (shadow) cube map arrays, we'd need > 5 coords in any case. > > v2: fix bugs (texel fetch using wrong layer coord for 1d, shadow tex > using wrong shadow coord for 2d...). Plus need to project the shadow > coord, and just for fun keep projecting the layer coord too. > --- > src/gallium/auxiliary/gallivm/lp_bld_sample.h |2 + > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 28 +--- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 159 > +++-- > 3 files changed, 90 insertions(+), 99 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > index c25d171..6d8fe88 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > @@ -335,7 +335,9 @@ texture_dims(enum pipe_texture_target tex) > case PIPE_TEXTURE_2D_ARRAY: > case PIPE_TEXTURE_RECT: > case PIPE_TEXTURE_CUBE: > + return 2; > case PIPE_TEXTURE_CUBE_ARRAY: > + assert(0); >return 2; > case PIPE_TEXTURE_3D: >return 3; > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > index 07ed48e..c312922 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > @@ -1574,7 +1574,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > unsigned target = static_texture_state->target; > unsigned dims = texture_dims(target); > unsigned num_quads = type.length / 4; > - unsigned mip_filter; > + unsigned mip_filter, i; > struct lp_build_sample_context bld; > struct lp_static_sampler_state derived_sampler_state = > *static_sampler_state; > LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); > @@ -1726,30 +1726,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, >} > } > > - /* > -* always use the same coords for layer, shadow cmp, should probably > -* put that into gallivm sampler interface I get real tired shuffling > -* coordinates. > -*/ > - newcoords[0] = coords[0]; /* 1st coord */ > - newcoords[1] = coords[1]; /* 2nd coord */ > - newcoords[2] = coords[2]; /* 3rd coord (for cube, 3d and layer) */ > - newcoords[3] = coords[3]; /* 4th coord (intended for cube array layer) */ > - newcoords[4] = coords[2]; /* shadow cmp coord */ > - if (target == PIPE_TEXTURE_1D_ARRAY) { > - newcoords[2] = coords[1]; /* layer coord */ > - /* FIXME: shadow cmp coord can be wrong if we don't take target from > shader decl. */ > - } > - else if (target == PIPE_TEXTURE_2D_ARRAY) { > - newcoords[2] = coords[2]; > - newcoords[4] = coords[3]; > - } > - else if (target == PIPE_TEXTURE_CUBE) { > - newcoords[4] = coords[3]; > - } > - else if (target == PIPE_TEXTURE_CUBE_ARRAY) { > - assert(0); /* not handled */ > - // layer coord is ok but shadow coord is impossible */ > + for (i = 0; i < 5; i++) { > + newcoords[i] = coords[i]; > } > > if (0) { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index db8e997..cab53df 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -1614,13 +1614,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > unsigned unit; > LLVMValueRef lod_bias, explicit_lod; > LLVMValueRef oow = NULL; > - LLVMValueRef coords[4]; > + LLVMValueRef coords[5]; > LLVMValueRef offsets[3] = { NULL }; > struct lp_derivatives derivs; > struct lp_derivatives *deriv_ptr = NULL; > boolean scalar_lod; > - unsigned num_coords, num_derivs, num_offsets; > - unsigned i; > + unsigned num_derivs, num_offsets, i; > + unsigned shadow_coord = 0; > + unsigned layer_coord = 0; > > if (!bld->sampler) { >_debug_printf("warning: found texture instruction but no sampler >generator supplied\n"); > @@ -1631,55 +1632,58 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > } > > switch (inst->Texture.Texture) { > - case TGSI_TEXTURE_1D: > - num_coords = 1; > - num_offsets = 1; > - num_derivs = 1; > - break; > case TGSI_TEXTURE_1D_ARRAY: > - num_coords = 2; > + layer_coord = 1; > + /* fallthrou
Re: [Mesa-dev] [PATCH] gallivm: do per-sample depth comparison instead of doing it post-filter
> - lp_build_sample_compare(&bld, newcoords[4], texel_out); > + if (0) > + lp_build_sample_compare(&bld, newcoords[4], texel_out); > } What does this do? The rest looks good to me! Reviewed-by: Zack Rusin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: fix stencil bug if we have both stencil and depth tests
- Original Message - > From: Roland Scheidegger > > This is a very well hidden bug found by accident (only the fixed glean > tstencil2 test so far seems to hit it). > We must use new mask with combined s_pass values and orig_mask values > for zpass/zfail stencil ops, otherwise both the sfail op and one of > zpass/zfail op are applied (probably not hit in most tests because > some of the ops tend to be KEEP usually). > > Note: this is a candidate for the 9.2 branch. Looks good ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] draw: handle nan clipdistance
If clipdistance for one of the vertices is nan (or inf) then the entire primitive should be discarded. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_cliptest_tmp.h |2 +- src/gallium/auxiliary/draw/draw_llvm.c |3 ++ src/gallium/auxiliary/draw/draw_pipe_clip.c| 13 +- src/gallium/auxiliary/gallivm/lp_bld_arit.c| 53 src/gallium/auxiliary/gallivm/lp_bld_arit.h| 11 + 5 files changed, 79 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h index e4500db..fc54810 100644 --- a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h +++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h @@ -140,7 +140,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs, clipdist = out->data[cd[0]][i]; else clipdist = out->data[cd[1]][i-4]; - if (clipdist < 0) + if (clipdist < 0 || util_is_inf_or_nan(clipdist)) mask |= 1 << plane_idx; } else { if (dot4(clipvertex, plane[plane_idx]) < 0) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 84e3392..1e9eadb 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1261,6 +1261,7 @@ generate_clipmask(struct draw_llvm *llvm, if (clip_user) { LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); LLVMValueRef indices[3]; + LLVMValueRef is_nan; /* userclip planes */ while (ucp_enable) { @@ -1280,6 +1281,8 @@ generate_clipmask(struct draw_llvm *llvm, clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], ""); } test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist); +is_nan = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist); +test = LLVMBuildOr(builder, test, is_nan, ""); temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index b76e9a5..2f2aadb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -104,7 +104,7 @@ static void interp_attr( float dst[4], float t, const float in[4], const float out[4] ) -{ +{ dst[0] = LINTERP( t, out[0], in[0] ); dst[1] = LINTERP( t, out[1], in[1] ); dst[2] = LINTERP( t, out[2], in[2] ); @@ -380,6 +380,9 @@ do_clip_tri( struct draw_stage *stage, dp_prev = getclipdist(clipper, vert_prev, plane_idx); clipmask &= ~(1<= MAX_CLIPPED_VERTICES) return; @@ -392,6 +395,9 @@ do_clip_tri( struct draw_stage *stage, float dp = getclipdist(clipper, vert, plane_idx); + if (util_is_inf_or_nan(dp)) +return; //discard nan + if (!IS_NEGATIVE(dp_prev)) { assert(outcount < MAX_CLIPPED_VERTICES); if (outcount >= MAX_CLIPPED_VERTICES) @@ -522,6 +528,9 @@ do_clip_line( struct draw_stage *stage, const float dp0 = getclipdist(clipper, v0, plane_idx); const float dp1 = getclipdist(clipper, v1, plane_idx); + if (util_is_inf_or_nan(dp0) || util_is_inf_or_nan(dp1)) + return; //discard nan + if (dp1 < 0.0F) { float t = dp1 / (dp1 - dp0); t1 = MAX2(t1, t); @@ -594,7 +603,7 @@ clip_tri( struct draw_stage *stage, unsigned clipmask = (header->v[0]->clipmask | header->v[1]->clipmask | header->v[2]->clipmask); - + if (clipmask == 0) { /* no clipping needed */ stage->next->tri( stage->next, header ); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 98409c3..72b563e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -3671,3 +3671,56 @@ lp_build_isfinite(struct lp_build_context *bld, return lp_build_compare(bld->gallivm, int_type, PIPE_FUNC_NOTEQUAL, intx, infornan32); } + +/* + * Returns true if the number is nan or inf or false otherwise. + * The input has to be a floating point vector. + */ +LLVMValueRef +lp_build_is_inf_or_nan(struct gallivm_state *gallivm, + const struct lp_type type, + LLVMValueRef x) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_type int_type = lp_int_type(type); + LLVMVal
Re: [Mesa-dev] [PATCH] draw: handle nan clipdistance
> I realize this function isn't used but it looks unnecessarily > complicated - two constants one AND plus one comparison when you could > simply do a single comparison (compare x with x with unordered not > equal). This is actually doubly bad with AVX because the int comparison > is going to use 4 instructions instead of 1 (extract/2 cmp/1 insert), > well if this runs 8-wide at least. I'm going to kill that function, we already have lp_build_isnan that does the correct thing. > Otherwise looks good. Though I'm not sure you really need to kill the > prims if the clip distances are infinite? The d3d10 spec says "Coordinates coming in to clipping with infinites at x, y, z may or may not result in a discarded primitive.". I liked handling them the same way as nan, otherwise we're just generating pointless primitives. I don't have a strong opinion though, wlk doesn't seem to test infinites. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: fix PIPE_MAX_SAMPLER/PIPE_MAX_SHADER_SAMPLER_VIEWS issues
Looks good. Reviewed-by: Zack Rusin - Original Message - > From: Roland Scheidegger > > pstipple/aaline stages used PIPE_MAX_SAMPLER instead of > PIPE_MAX_SHADER_SAMPLER_VIEWS when dealing with sampler views. > Now these stages can't actually handle sampler_unit != texture_unit anyway > (they cannot work with d3d10 shaders at all due to using tex not sample > opcodes as "mixed mode" shaders are impossible) but this leads to crashes if > a driver just installs these stages and then more than PIPE_MAX_SAMPLER views > are set even if the stages aren't even used. > --- > src/gallium/auxiliary/draw/draw_pipe_aaline.c |6 +++--- > src/gallium/auxiliary/draw/draw_pipe_pstipple.c |6 +++--- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c > b/src/gallium/auxiliary/draw/draw_pipe_aaline.c > index c44c236..8483bd7 100644 > --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c > +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c > @@ -107,7 +107,7 @@ struct aaline_stage > struct aaline_fragment_shader *fs; > struct { >void *sampler[PIPE_MAX_SAMPLERS]; > - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; > + struct pipe_sampler_view > *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; > } state; > > /* > @@ -763,7 +763,7 @@ aaline_destroy(struct draw_stage *stage) > struct pipe_context *pipe = stage->draw->pipe; > uint i; > > - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { > + for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { >pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); > } > > @@ -937,7 +937,7 @@ aaline_set_sampler_views(struct pipe_context *pipe, > for (i = 0; i < num; i++) { >pipe_sampler_view_reference(&aaline->state.sampler_views[i], >views[i]); > } > - for ( ; i < PIPE_MAX_SAMPLERS; i++) { > + for ( ; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { >pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); > } > aaline->num_sampler_views = num; > diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c > b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c > index 51f5a86..f38addd 100644 > --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c > +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c > @@ -87,7 +87,7 @@ struct pstip_stage > struct pstip_fragment_shader *fs; > struct { >void *samplers[PIPE_MAX_SAMPLERS]; > - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; > + struct pipe_sampler_view > *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; >const struct pipe_poly_stipple *stipple; > } state; > > @@ -592,7 +592,7 @@ pstip_destroy(struct draw_stage *stage) > struct pstip_stage *pstip = pstip_stage(stage); > uint i; > > - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { > + for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { >pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); > } > > @@ -731,7 +731,7 @@ pstip_set_sampler_views(struct pipe_context *pipe, > for (i = 0; i < num; i++) { >pipe_sampler_view_reference(&pstip->state.sampler_views[i], views[i]); > } > - for (; i < PIPE_MAX_SAMPLERS; i++) { > + for (; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { >pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); > } > > -- > 1.7.9.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] gallivm: handle unbound textures in texture sampling / texture queries
Same here. - Original Message - > Series LGTM. > > Jose > > - Original Message - > > From: Roland Scheidegger > > > > Turns out we don't need to do much extra work for detecting this case, > > since we are guaranteed to get a empty static texture state in this case, > > hence just rely on format being 0 and return all zero then. > > Previously needed dummy textures (would just have crashed on format being 0 > > otherwise) which cannot return the correct result for size queries and when > > sampling textures with wrap modes using border. > > As a bonus should hugely increase performance when sampling unbound > > textures > > - > > too bad it isn't a useful feature :-). > > --- > > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 26 > > + > > 1 file changed, 26 insertions(+) > > > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > > index db5e366..e0d3dd2 100644 > > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > > @@ -2088,6 +2088,19 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > >debug_printf("Sample from %s\n", util_format_name(fmt)); > > } > > > > + if (static_texture_state->format == PIPE_FORMAT_NONE) { > > + /* > > + * If there's nothing bound, format is NONE, and we must return > > + * all zero as mandated by d3d10 in this case. > > + */ > > + unsigned chan; > > + LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F); > > + for (chan = 0; chan < 4; chan++) { > > + texel_out[chan] = zero; > > + } > > + return; > > + } > > + > > assert(type.floating); > > > > /* Setup our build context */ > > @@ -2517,6 +2530,19 @@ lp_build_size_query_soa(struct gallivm_state > > *gallivm, > > unsigned num_lods = 1; > > struct lp_build_context bld_int_vec4; > > > > + if (static_state->format == PIPE_FORMAT_NONE) { > > + /* > > + * If there's nothing bound, format is NONE, and we must return > > + * all zero as mandated by d3d10 in this case. > > + */ > > + unsigned chan; > > + LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F); > > + for (chan = 0; chan < 4; chan++) { > > + sizes_out[chan] = zero; > > + } > > + return; > > + } > > + > > /* > > * Do some sanity verification about bound texture and shader dcl > > target. > > * Not entirely sure what's possible but assume array/non-array > > -- > > 1.7.9.5 > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallivm: support indirect registers on both dimensions
We support indirect addressing only on the vertex index, but some shaders also use indirect addressing on attributes. This patch adds support for indirect addressing on both dimensions inside gs arrays. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c | 23 +-- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 3 ++- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 4 +++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 820d6b0..03668d9 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1360,8 +1360,9 @@ clipmask_booli32(struct gallivm_state *gallivm, static LLVMValueRef draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, struct lp_build_tgsi_context * bld_base, - boolean is_indirect, + boolean is_vindex_indirect, LLVMValueRef vertex_index, + boolean is_aindex_indirect, LLVMValueRef attrib_index, LLVMValueRef swizzle_index) { @@ -1372,18 +1373,28 @@ draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, LLVMValueRef res; struct lp_type type = bld_base->base.type; - if (is_indirect) { + if (is_vindex_indirect || is_aindex_indirect) { int i; res = bld_base->base.zero; for (i = 0; i < type.length; ++i) { LLVMValueRef idx = lp_build_const_int32(gallivm, i); - LLVMValueRef vert_chan_index = LLVMBuildExtractElement(builder, -vertex_index, idx, ""); + LLVMValueRef vert_chan_index = vertex_index; + LLVMValueRef attr_chan_index = attrib_index; LLVMValueRef channel_vec, value; + + if (is_vindex_indirect) { +vert_chan_index = LLVMBuildExtractElement(builder, + vertex_index, idx, ""); + } + if (is_aindex_indirect) { +attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + indices[0] = vert_chan_index; - indices[1] = attrib_index; + indices[1] = attr_chan_index; indices[2] = swizzle_index; - + channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, ""); channel_vec = LLVMBuildLoad(builder, channel_vec, ""); value = LLVMBuildExtractElement(builder, channel_vec, idx, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 522302e..8bcdbc8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -395,8 +395,9 @@ struct lp_build_tgsi_gs_iface { LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface, struct lp_build_tgsi_context * bld_base, - boolean is_indirect, + boolean is_vindex_indirect, LLVMValueRef vertex_index, + boolean is_aindex_indirect, LLVMValueRef attrib_index, LLVMValueRef swizzle_index); void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 4c6b6ec..e50f1d1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1135,7 +1135,9 @@ emit_fetch_gs_input( res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, reg->Dimension.Indirect, -vertex_index, attrib_index, +vertex_index, +reg->Register.Indirect, +attrib_index, swizzle_index); assert(res); -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Revert "draw: cleanup the extra attribs"
> This reverts commit 57cd3267782fcf92d1e7d772760956516d4367df. > > This fixes piglit regressions with additional draw stages on > llvmpipe, softpipe and i915g. The attributes can't be cleared at > this point because they might be in use by the additional draw > stages. The attributes have to cleared but the interface for looking them up has to be exactly the same in llvmpipe (i.e. only llvmpipe does it correctly). > https://bugs.freedesktop.org/show_bug.cgi?id=67963 > https://bugs.freedesktop.org/show_bug.cgi?id=67965 > https://bugs.freedesktop.org/show_bug.cgi?id=67966 All of which have been fixed for a long time, just no one had the time to verify and close. In other words please don't revert, if you don't feel like changing the shader output lookup just remove the prepare_shader_outputs call, like I mentioned, and that should get you the old behavior back. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] draw: cleanup the extra attribs
Hi, Stéphane. No we should not revert to the old behavior. The old behavior was incorrect. Consider this: -- setup state that draws a wireframe -> draw should inject frontface -- the driver needs to be able to find the injected wireframe output -- draw -- setup state the draws solid fill with fragment shader using primid input -> draw should inject primid but not frontface -- driver needs to be able to find the injected primid but not frontface info -- draw Without cleaning the attributed before the second draw the draw will keep the frontface id in the extra attribs, incorrectly pointing the driver to a non-existing crash. That's why the attribs need to be cleaned before rendering. i915g simply shouldn't call draw_prepare_shader_outputs because it doesn't know what to do with the injected front-face or primid anyway. That part I'd suggest you remove. It will get you back to the old behavior. z - Original Message - > Hi Zack, > This change regresses a bunch of point sprite piglit tests on i915g. Should > we revert back to the old behaviour? As far as I can see, it was correct (it > was keeping the attributes in case another stage is using them). > Stéphane > On Thu, Aug 8, 2013 at 12:46 PM, Zack Rusin < za...@vmware.com > wrote: > > Before inserting new front face and prim id outputs cleanup > > > the old extra outputs, otherwise our cache will use previous > > > output slots which will break as soon as outputs of the current > > > shader don't match the last. > > > Signed-off-by: Zack Rusin < za...@vmware.com > > > > --- > > > src/gallium/auxiliary/draw/draw_context.c | 1 + > > > 1 file changed, 1 insertion(+) > > > diff --git a/src/gallium/auxiliary/draw/draw_context.c > > b/src/gallium/auxiliary/draw/draw_context.c > > > index af9caee..2dc6772 100644 > > > --- a/src/gallium/auxiliary/draw/draw_context.c > > > +++ b/src/gallium/auxiliary/draw/draw_context.c > > > @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw) > > > void > > > draw_prepare_shader_outputs(struct draw_context *draw) > > > { > > > + draw_remove_extra_vertex_attribs(draw); > > > draw_ia_prepare_outputs(draw, draw->pipeline.ia); > > > draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled); > > > } > > > -- > > > 1.7.10.4 > > > ___ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] util/u_blit: Implement util_blit_pixels via pipe_context::blit.
The entire series looks good to me. Reviewed-by: Zack Rusin - Original Message - > From: José Fonseca > > This removes a lot of code, but not everything, as util_blit_pixels_tex > is still useful when one needs to override pipe_sampler_view::swizzle_?. > --- > src/gallium/auxiliary/util/u_blit.c | 447 > +++- > 1 file changed, 37 insertions(+), 410 deletions(-) > > diff --git a/src/gallium/auxiliary/util/u_blit.c > b/src/gallium/auxiliary/util/u_blit.c > index e9bec4a..4ba71b9 100644 > --- a/src/gallium/auxiliary/util/u_blit.c > +++ b/src/gallium/auxiliary/util/u_blit.c > @@ -57,29 +57,20 @@ struct blit_state > struct pipe_context *pipe; > struct cso_context *cso; > > - struct pipe_blend_state blend_write_color, blend_keep_color; > + struct pipe_blend_state blend_write_color; > struct pipe_depth_stencil_alpha_state dsa_keep_depthstencil; > - struct pipe_depth_stencil_alpha_state dsa_write_depthstencil; > - struct pipe_depth_stencil_alpha_state dsa_write_depth; > - struct pipe_depth_stencil_alpha_state dsa_write_stencil; > struct pipe_rasterizer_state rasterizer; > struct pipe_sampler_state sampler; > struct pipe_viewport_state viewport; > struct pipe_vertex_element velem[2]; > - enum pipe_texture_target internal_target; > > void *vs; > void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1]; > - void *fs_depthstencil[PIPE_MAX_TEXTURE_TYPES]; > - void *fs_depth[PIPE_MAX_TEXTURE_TYPES]; > - void *fs_stencil[PIPE_MAX_TEXTURE_TYPES]; > > struct pipe_resource *vbuf; /**< quad vertices */ > unsigned vbuf_slot; > > float vertices[4][2][4]; /**< vertex/texcoords for quad */ > - > - boolean has_stencil_export; > }; > > > @@ -103,20 +94,6 @@ util_create_blit(struct pipe_context *pipe, struct > cso_context *cso) > /* disabled blending/masking */ > ctx->blend_write_color.rt[0].colormask = PIPE_MASK_RGBA; > > - /* depth stencil states */ > - ctx->dsa_write_depth.depth.enabled = 1; > - ctx->dsa_write_depth.depth.writemask = 1; > - ctx->dsa_write_depth.depth.func = PIPE_FUNC_ALWAYS; > - ctx->dsa_write_stencil.stencil[0].enabled = 1; > - ctx->dsa_write_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; > - ctx->dsa_write_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; > - ctx->dsa_write_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; > - ctx->dsa_write_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; > - ctx->dsa_write_stencil.stencil[0].valuemask = 0xff; > - ctx->dsa_write_stencil.stencil[0].writemask = 0xff; > - ctx->dsa_write_depthstencil.depth = ctx->dsa_write_depth.depth; > - ctx->dsa_write_depthstencil.stencil[0] = > ctx->dsa_write_stencil.stencil[0]; > - > /* rasterizer */ > ctx->rasterizer.cull_face = PIPE_FACE_NONE; > ctx->rasterizer.half_pixel_center = 1; > @@ -147,14 +124,6 @@ util_create_blit(struct pipe_context *pipe, struct > cso_context *cso) >ctx->vertices[i][1][3] = 1.0f; /* q */ > } > > - if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES)) > - ctx->internal_target = PIPE_TEXTURE_2D; > - else > - ctx->internal_target = PIPE_TEXTURE_RECT; > - > - ctx->has_stencil_export = > - pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT); > - > return ctx; > } > > @@ -178,18 +147,6 @@ util_destroy_blit(struct blit_state *ctx) >} > } > > - for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { > - if (ctx->fs_depthstencil[i]) { > - pipe->delete_fs_state(pipe, ctx->fs_depthstencil[i]); > - } > - if (ctx->fs_depth[i]) { > - pipe->delete_fs_state(pipe, ctx->fs_depth[i]); > - } > - if (ctx->fs_stencil[i]) { > - pipe->delete_fs_state(pipe, ctx->fs_stencil[i]); > - } > - } > - > pipe_resource_reference(&ctx->vbuf, NULL); > > FREE(ctx); > @@ -217,63 +174,6 @@ set_fragment_shader(struct blit_state *ctx, uint > writemask, > > > /** > - * Helper function to set the shader which writes depth and stencil. > - */ > -static INLINE void > -set_depthstencil_fragment_shader(struct blit_state *ctx, > - enum pipe_texture_target pipe_tex) > -{ > - if (!ctx->fs_depthstencil[pipe_tex]) { > - unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); > - > - ctx->fs_depthstencil[pipe_tex] = > - util_make_fragment_tex_shader_writedepthstencil(ct
[Mesa-dev] [PATCH 1/3] llvmpipe: count c_primitives before discarding null prims
We need to count the clipper primitives before the rasterizer discards one it considers to be null. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 23bc6e2..e61efd4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -252,7 +252,6 @@ do_triangle_ccw(struct lp_setup_context *setup, const float (*v2)[4], boolean frontfacing ) { - struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; @@ -340,11 +339,6 @@ do_triangle_ccw(struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries && - !llvmpipe_rasterization_disabled(lp_context)) { - lp_context->pipeline_statistics.c_primitives++; - } - /* Setup parameter interpolants: */ setup->setup.variant->jit_function( v0, @@ -803,7 +797,6 @@ static void retry_triangle_ccw( struct lp_setup_context *setup, } } - /** * Calculate fixed position data for a triangle */ @@ -1102,11 +1095,17 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v2)[4] ) { struct fixed_position position; + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; if (setup->subdivide_large_triangles && check_subdivide_triangle(setup, v0, v1, v2, triangle_both)) return; + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { + lp_context->pipeline_statistics.c_primitives++; + } + calc_fixed_position(setup, &position, v0, v1, v2); if (0) { -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] llvmpipe: increase number of subpixel bits to eight
Unfortunately d3d10 requires a lot higher precision (e.g. wgf11clipping tests for it). The smallest number of precision bits with which it passes is 8. That means that we need to decrease the maximum length of an edge that we can handle without subdivision by 4 bits. Abstracted the code a bit to make it easier to change once to switch to 64bit rasterization. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_rast.h | 12 +++- src/gallium/drivers/llvmpipe/lp_setup.c | 14 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c57f2ea..b72be55 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -46,10 +46,20 @@ struct lp_scene; struct lp_fence; struct cmd_bin; +#define FIXED_TYPE_WIDTH 32 /** For sub-pixel positioning */ -#define FIXED_ORDER 4 +#define FIXED_ORDER 8 #define FIXED_ONE (1<draw_regions[i]); } } - /* If the framebuffer is large we have to think about fixed-point - * integer overflow. For 2K by 2K images, coordinates need 15 bits - * (2^11 + 4 subpixel bits). The product of two such numbers would - * use 30 bits. Any larger and we could overflow a 32-bit int. - * - * To cope with this problem we check if triangles are large and - * subdivide them if needed. + /* + * Subdivide triangles if the framebuffer is larger than our + * MAX_FIXED_LENGTH cab accomodate. */ - setup->subdivide_large_triangles = (setup->fb.width > 2048 && - setup->fb.height > 2048); + setup->subdivide_large_triangles = (setup->fb.width > MAX_FIXED_LENGTH && + setup->fb.height > MAX_FIXED_LENGTH); } setup->dirty = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e61efd4..ee30a64 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -988,7 +988,7 @@ check_subdivide_triangle(struct lp_setup_context *setup, const float (*v2)[4], triangle_func_t tri) { - const float maxLen = 2048.0f; /* longest permissible edge, in pixels */ + const float maxLen = MAX_FIXED_LENGTH; /* longest permissible edge, in pixels */ float dx10, dy10, len10; float dx21, dy21, len21; float dx02, dy02, len02; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] draw/clip: don't emit so many empty triangles
Compress empty triangles (don't emit more than one in a row) and never emit empty triangles if we already generated a triangle covering a non-null area. We can't skip all null-triangles because c_primitives expects ones that were generated from vertices exactly at the clipping-plane, to be emitted. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 39 + 1 file changed, 39 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0f90bfd..2d6df81 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -209,6 +209,29 @@ static void interp( const struct clip_stage *clip, } } +/** + * Checks whether the specifed triangle is empty and if it is returns + * true, otherwise returns false. + * Triangle is considered null/empty if it's area is qual to zero. + */ +static INLINE boolean +is_tri_null(struct draw_context *draw, const struct prim_header *header) +{ + const unsigned pos_attr = draw_current_shader_position_output(draw); + float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0]; + float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1]; + float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2]; + + float x2 = header->v[2]->data[pos_attr][0] - header->v[0]->data[pos_attr][0]; + float y2 = header->v[2]->data[pos_attr][1] - header->v[0]->data[pos_attr][1]; + float z2 = header->v[2]->data[pos_attr][2] - header->v[0]->data[pos_attr][2]; + + float vx = y1 * z2 - z1 * y2; + float vy = x1 * z2 - z1 * x2; + float vz = x1 * y2 - y1 * x2; + + return (vx*vx + vy*vy + vz*vz) == 0.f; +} /** * Emit a post-clip polygon to the next pipeline stage. The polygon @@ -223,6 +246,8 @@ static void emit_poly( struct draw_stage *stage, struct prim_header header; unsigned i; ushort edge_first, edge_middle, edge_last; + boolean last_tri_was_null = FALSE; + boolean tri_was_not_null = FALSE; if (stage->draw->rasterizer->flatshade_first) { edge_first = DRAW_PIPE_EDGE_FLAG_0; @@ -244,6 +269,7 @@ static void emit_poly( struct draw_stage *stage, header.pad = 0; for (i = 2; i < n; i++, header.flags = edge_middle) { + boolean tri_null; /* order the triangle verts to respect the provoking vertex mode */ if (stage->draw->rasterizer->flatshade_first) { header.v[0] = inlist[0]; /* the provoking vertex */ @@ -256,6 +282,19 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* the provoking vertex */ } + tri_null = is_tri_null(stage->draw, &header); + /* If we generated a triangle with an area, aka. non-null triangle, + * or if the previous triangle was also null then skip all subsequent + * null triangles */ + if ((tri_was_not_null && tri_null) || (last_tri_was_null && tri_null)) { + last_tri_was_null = tri_null; + continue; + } + last_tri_was_null = tri_null; + if (!tri_null) { + tri_was_not_null = TRUE; + } + if (!edgeflags[i-1]) { header.flags &= ~edge_middle; } -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: align the array used for subdivived vertices
When subdiving a triangle we're using a temporary array to store the new coordinates for the subdivided triangles. Unfortunately the array used for that was not aligned properly causing random crashes in the llvm jit code which was trying to load vectors from it. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 8b0fcd0..cf67f29 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -909,7 +909,7 @@ subdiv_tri(struct lp_setup_context *setup, unsigned n = setup->fs.current.variant->shader->info.base.num_inputs + 1; const struct lp_shader_input *inputs = setup->fs.current.variant->shader->inputs; - float vmid[PIPE_MAX_ATTRIBS][4]; + PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) float vmid[PIPE_MAX_ATTRIBS][4]; const float (*vm)[4] = (const float (*)[4]) vmid; unsigned i; float w0, w1, wm; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: we need to subdivide if fb is bigger in either direction
We need to subdivide triangles if either of the dimensions is larger than the max edge length, not when both of them are larger. Signed-off-by: Zack Rusin --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 5fde01f..c8199b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -1015,7 +1015,7 @@ try_update_scene_state( struct lp_setup_context *setup ) * To cope with this problem we check if triangles are large and * subdivide them if needed. */ - setup->subdivide_large_triangles = (setup->fb.width > 2048 && + setup->subdivide_large_triangles = (setup->fb.width > 2048 || setup->fb.height > 2048); } -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally
GLX_INTEL_swap_event is broken on the server side, where it's currently unconditionally enabled. This completely breaks systems running on drivers which don't support that extension. There's no way to test for its presence on this side, so instead of disabling it uncondtionally, just disable it for drivers which are known to not support it. It makes sense because most drivers do support it right now. We'll be able to remove this once Xserver properly advertises GLX_INTEL_swap_event. Note: This is a candidate for the 9.0 branch. Signed-off-by: Zack Rusin --- src/glx/dri2_glx.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 46a92fd..05808d4 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -1051,7 +1051,8 @@ static const struct glx_context_vtable dri2_context_vtable = { }; static void -dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions) +dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions, + const char *driverName) { int i; @@ -1060,7 +1061,15 @@ dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions) __glXEnableDirectExtension(&psc->base, "GLX_MESA_swap_control"); __glXEnableDirectExtension(&psc->base, "GLX_SGI_make_current_read"); - if (psc->dri2->base.version >= 4) { + /* +* GLX_INTEL_swap_event is broken on the server side, where it's +* currently unconditionally enabled. This completely breaks +* systems running on drivers which don't support that extension. +* There's no way to test for its presence on this side, so instead +* of disabling it uncondtionally, just disable it for drivers +* which are known to not support it. +*/ + if (strcmp(driverName, "vmwgfx") != 0) { __glXEnableDirectExtension(&psc->base, "GLX_INTEL_swap_event"); } @@ -1204,7 +1213,7 @@ dri2CreateScreen(int screen, struct glx_display * priv) } extensions = psc->core->getExtensions(psc->driScreen); - dri2BindExtensions(psc, extensions); + dri2BindExtensions(psc, extensions, driverName); configs = driConvertConfigs(psc->core, psc->base.configs, driver_configs); visuals = driConvertConfigs(psc->core, psc->base.visuals, driver_configs); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally
> Seems like we should also fix the server to not advertise the extension > if the driver doesn't have the appropriate hooks implemented. But I > have no problem with doing this on the client side too. I've sent a patch last night to xorg-devel to handle it. But since it's likely to only be included in 1.14 it's very unlikely that it will help the currently broken distros, so that's why I wanted to have a stable Mesa release that works with either server. > Reviewed-by: Jesse Barnes Thanks! z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally
> Should we also test for the swrast driver? That is actually handled by a hack in the Xserver. http://cgit.freedesktop.org/xorg/xserver/commit/glx?id=988d7ace19a009991a4528e783d1a94c2444c66a The extension was manually removed from the list of extensions that are at all available to the software rasterizer. It just hid the real problem. > Otherwise, looks good. > > Reviewed-by: Brian Paul Thanks! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] Fix geometry shaders in the draw module
This is a merge of Bryan's gs patches plus some work on top of them that fixes the known issues with geometry shaders in the draw module. I had to fix the llvm paths in the "account for separate shader objects" commit because it broke them. With this both softpipe and llvmpipe have working geometry shaders. Bryan Cain (3): draw/gs: fix allocation of buffer for GS output vertices draw: account for separate shader objects in geometry shader code draw: use geometry shader info in clip_init_state if appropriate Zack Rusin (1): draw/llvm: fix inputs to the geometry shader src/gallium/auxiliary/draw/draw_gs.c | 32 +--- src/gallium/auxiliary/draw/draw_gs.h |2 ++ src/gallium/auxiliary/draw/draw_llvm.c | 15 ++--- src/gallium/auxiliary/draw/draw_pipe_clip.c| 15 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c |1 + .../draw/draw_pt_fetch_shade_pipeline_llvm.c |2 ++ 6 files changed, 52 insertions(+), 15 deletions(-) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] draw/gs: fix allocation of buffer for GS output vertices
From: Bryan Cain Before, it accounted for the size of the vertices but not the other fields in the vertex_header struct, which caused memory corruption. Reviewed-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 5c55523..2ce1a2a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -401,7 +401,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, output_verts->vertex_size = input_verts->vertex_size; output_verts->stride = input_verts->vertex_size; output_verts->verts = - (struct vertex_header *)MALLOC(input_verts->vertex_size * + (struct vertex_header *)MALLOC(sizeof(struct vertex_header) + + input_verts->vertex_size * num_in_primitives * shader->max_output_vertices); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] draw: account for separate shader objects in geometry shader code
From: Bryan Cain The geometry shader code seems to have been originally written with the assumptions that there are the same number of VS outputs as GS outputs and that VS outputs are in the same order as their corresponding GS inputs. Since TGSI uses separate shader objects, these are both wrong assumptions. This was causing several valid vertex/geometry shader combinations to either render incorrectly or trigger an assertion. Conflicts: src/gallium/auxiliary/draw/draw_gs.c Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c | 31 +--- src/gallium/auxiliary/draw/draw_gs.h |2 ++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c |1 + .../draw/draw_pt_fetch_shade_pipeline_llvm.c |2 ++ 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 2ce1a2a..5247917 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -148,6 +148,22 @@ void draw_delete_geometry_shader(struct draw_context *draw, FREE(dgs); } +static INLINE int +draw_gs_get_input_index(int semantic, int index, +const struct tgsi_shader_info *input_info) +{ + int i; + const ubyte *input_semantic_names = input_info->output_semantic_name; + const ubyte *input_semantic_indices = input_info->output_semantic_index; + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if (input_semantic_names[i] == semantic && + input_semantic_indices[i] == index) + return i; + } + debug_assert(0); + return -1; +} + /*#define DEBUG_OUTPUTS 1*/ static INLINE void draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, @@ -228,6 +244,10 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, machine->Inputs[idx].xyzw[3].f[prim_idx] = (float)shader->in_prim_idx; } else { +vs_slot = draw_gs_get_input_index( +shader->info.input_semantic_name[slot], +shader->info.input_semantic_index[slot], +shader->input_info); #if DEBUG_INPUTS debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", slot, vs_slot, idx); @@ -381,12 +401,14 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, + const struct tgsi_shader_info *input_info, struct draw_vertex_info *output_verts, struct draw_prim_info *output_prims ) { const float (*input)[4] = (const float (*)[4])input_verts->verts->data; unsigned input_stride = input_verts->vertex_size; - unsigned vertex_size = input_verts->vertex_size; + unsigned num_outputs = shader->info.num_outputs; + unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); struct tgsi_exec_machine *machine = shader->machine; unsigned num_input_verts = input_prim->linear ? input_verts->count : @@ -398,11 +420,11 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, shader->max_output_vertices) * num_in_primitives; - output_verts->vertex_size = input_verts->vertex_size; - output_verts->stride = input_verts->vertex_size; + output_verts->vertex_size = vertex_size; + output_verts->stride = output_verts->vertex_size; output_verts->verts = (struct vertex_header *)MALLOC(sizeof(struct vertex_header) + - input_verts->vertex_size * + output_verts->vertex_size * num_in_primitives * shader->max_output_vertices); @@ -426,6 +448,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, shader->in_prim_idx = 0; shader->input_vertex_stride = input_stride; shader->input = input; + shader->input_info = input_info; FREE(shader->primitive_lengths); shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index bfac02c..5d10d0d 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -64,6 +64,7 @@ struct draw_geometry_shader { unsigned in_prim_idx; unsigned input_vertex_stride; const float (*input)[4]; + const struct tgs
[Mesa-dev] [PATCH 3/4] draw: use geometry shader info in clip_init_state if appropriate
From: Bryan Cain Reviewed-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 3110809..eeaaf41 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -40,6 +40,7 @@ #include "draw_vs.h" #include "draw_pipe.h" #include "draw_fs.h" +#include "draw_gs.h" /** Set to 1 to enable printing of coords before/after clipping */ @@ -596,8 +597,10 @@ clip_init_state( struct draw_stage *stage ) { struct clip_stage *clipper = clip_stage( stage ); const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; + const struct draw_geometry_shader *gs = stage->draw->gs.geometry_shader; const struct draw_fragment_shader *fs = stage->draw->fs.fragment_shader; uint i; + struct tgsi_shader_info *vs_info = gs ? &gs->info : &vs->info; /* We need to know for each attribute what kind of interpolation is * done on it (flat, smooth or noperspective). But the information @@ -640,16 +643,16 @@ clip_init_state( struct draw_stage *stage ) clipper->num_flat_attribs = 0; memset(clipper->noperspective_attribs, 0, sizeof(clipper->noperspective_attribs)); - for (i = 0; i < vs->info.num_outputs; i++) { + for (i = 0; i < vs_info->num_outputs; i++) { /* Find the interpolation mode for a specific attribute */ int interp; /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode * from the array we've filled before. */ - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - interp = indexed_interp[vs->info.output_semantic_index[i]]; + if (vs_info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs_info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + interp = indexed_interp[vs_info->output_semantic_index[i]]; } else { /* Otherwise, search in the FS inputs, with a decent default * if we don't find it. @@ -658,8 +661,8 @@ clip_init_state( struct draw_stage *stage ) interp = TGSI_INTERPOLATE_PERSPECTIVE; if (fs) { for (j = 0; j < fs->info.num_inputs; j++) { - if (vs->info.output_semantic_name[i] == fs->info.input_semantic_name[j] && - vs->info.output_semantic_index[i] == fs->info.input_semantic_index[j]) { + if (vs_info->output_semantic_name[i] == fs->info.input_semantic_name[j] && + vs_info->output_semantic_index[i] == fs->info.input_semantic_index[j]) { interp = fs->info.input_interpolate[j]; break; } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] draw/llvm: fix inputs to the geometry shader
We can't clip and viewport transform the vertices before we let the geometry shader process them. Lets make sure the generated vertex shader has both disabled if geometry shader is present. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8e46687..ff38a11 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1176,11 +1176,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef fetch_max; struct lp_build_sampler_soa *sampler = 0; - LLVMValueRef ret, clipmask_bool_ptr; - const boolean bypass_viewport = variant->key.bypass_viewport; - const boolean enable_cliptest = variant->key.clip_xy || - variant->key.clip_z || - variant->key.clip_user; + LLVMValueRef ret, clipmask_bool_ptr; + const struct draw_geometry_shader *gs = draw->gs.geometry_shader; + /* If geometry shader is present we need to skip both the viewport +* transformation and clipping otherwise the inputs to the geometry +* shader will be incorrect. +*/ + const boolean bypass_viewport = gs || variant->key.bypass_viewport; + const boolean enable_cliptest = !gs && (variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user); LLVMValueRef variant_func; const unsigned pos = draw_current_shader_position_output(llvm->draw); const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] tgsi/exec: Correctly reset NumOutputs before parsing the shader
Whenever we're binding the shaders we're incrementing NumOutputs, assuming the parser spots an output decleration, but we were never reseting the variable. That means that each subsequent bind of a geometry shader would add its number of output to the number of output bound by all previously ran shaders and our indexes would get completely messed up. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 6a74ef3..17ee079 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -681,6 +681,7 @@ tgsi_exec_machine_bind_shader( mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; + mach->NumOutputs = 0; if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && !mach->UsedGeometryShader) { @@ -1484,12 +1485,15 @@ store_dest(struct tgsi_exec_machine *mach, + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; #if 0 + debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", + mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], + reg->Register.Index); if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { - fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) - fprintf(stderr, "%f, ", chan->f[i]); - fprintf(stderr, ")\n"); + debug_printf("%f, ", chan->f[i]); + debug_printf(")\n"); } #endif break; -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] draw/gs: Correctly iterate the emitted primitives
We were assuming that each emitted primitive had the same number of vertices. That is incorrect. Emitted primitives can have arbirtrary number of vertices. Simply increment index on iteration to fix it. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c |8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 99335af..e605965 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -172,6 +172,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, { struct tgsi_exec_machine *machine = shader->machine; unsigned prim_idx, j, slot; + unsigned current_idx = 0; float (*output)[4]; output = *p_output; @@ -184,9 +185,8 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, shader->primitive_lengths[prim_idx + shader->emitted_primitives] = machine->Primitives[prim_idx]; shader->emitted_vertices += num_verts_per_prim; - for (j = 0; j < num_verts_per_prim; j++) { - int idx = (prim_idx * num_verts_per_prim + j) * - shader->info.num_outputs; + for (j = 0; j < num_verts_per_prim; j++, current_idx++) { + int idx = current_idx * shader->info.num_outputs; #ifdef DEBUG_OUTPUTS debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs); #endif @@ -208,7 +208,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, } } *p_output = output; - shader->emitted_primitives += num_primitives; + shader->emitted_primitives += num_primitives; } /*#define DEBUG_INPUTS 1*/ -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] draw/so: Fix stream output with geometry shaders
If geometry shader is present its stream output info should be used instead of the vs and we shouldn't use the pre-clipped corrdinates. Signed-off-by: Zack Rusin --- .../draw/draw_pt_fetch_shade_pipeline_llvm.c |2 +- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 37 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5fc9436..288b524 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -102,7 +102,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? TRUE : FALSE) ); - draw_pt_so_emit_prepare( fpme->so_emit, TRUE ); + draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 13d8470..64ed544 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -27,6 +27,7 @@ #include "draw/draw_private.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" @@ -51,13 +52,37 @@ struct pt_so_emit { unsigned generated_primitives; }; +static const struct pipe_stream_output_info * +draw_so_info(const struct draw_context *draw) +{ + const struct pipe_stream_output_info *state = NULL; + + if (draw->gs.geometry_shader) { + state = &draw->gs.geometry_shader->state.stream_output; + } else { + state = &draw->vs.vertex_shader->state.stream_output; + } + + return state; +} + +static INLINE boolean +draw_has_so(const struct draw_context *draw) +{ + const struct pipe_stream_output_info *state = draw_so_info(draw); + + if (state && state->num_outputs > 0) + return TRUE; + + return FALSE; +} void draw_pt_so_emit_prepare(struct pt_so_emit *emit, boolean use_pre_clip_pos) { struct draw_context *draw = emit->draw; emit->use_pre_clip_pos = use_pre_clip_pos; - emit->has_so = (draw->vs.vertex_shader->state.stream_output.num_outputs > 0); + emit->has_so = draw_has_so(draw); if (use_pre_clip_pos) emit->pos_idx = draw_current_shader_position_output(draw); @@ -92,8 +117,7 @@ static void so_emit_prim(struct pt_so_emit *so, struct draw_context *draw = so->draw; const float (*input_ptr)[4]; const float (*pcp_ptr)[4] = NULL; - const struct pipe_stream_output_info *state = - &draw->vs.vertex_shader->state.stream_output; + const struct pipe_stream_output_info *state = draw_so_info(draw); float *buffer; int buffer_total_bytes[PIPE_MAX_SO_BUFFERS]; @@ -125,9 +149,8 @@ static void so_emit_prim(struct pt_so_emit *so, for (i = 0; i < num_vertices; ++i) { const float (*input)[4]; const float (*pre_clip_pos)[4]; - unsigned total_written_compos = 0; int ob; - /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ + input = (const float (*)[4])( (const char *)input_ptr + (indices[i] * input_vertex_stride)); @@ -145,11 +168,11 @@ static void so_emit_prim(struct pt_so_emit *so, buffer = (float *)((char *)draw->so.targets[ob]->mapping + draw->so.targets[ob]->target.buffer_offset + draw->so.targets[ob]->internal_offset) + state->output[slot].dst_offset; + if (idx == so->pos_idx && pcp_ptr) -memcpy(buffer, &pre_clip_pos[start_comp], num_comps * sizeof(float)); +memcpy(buffer, &pre_clip_pos[idx][start_comp], num_comps * sizeof(float)); else memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float)); - total_written_compos += num_comps; } for (ob = 0; ob < draw->so.num_targets; ++ob) draw->so.targets[ob]->internal_offset += state->stride[ob] * sizeof(float); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't
> If ddx does not support swap, don't advertise it. We might also be > able to get rid of the vmwgfx check (I'm not quite sure the purpose of > that check vs. just checking dri2Minor. No, not really. GLX_INTEL_swap_event doesn't have any hooks. You're checking for presence of generic swap extension. Also unfortunately dri2minor has no correlation to glx_intel_swap_event. There's no way to check for presence of GLX_INTEL_swap_event on this side. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't
> well, from what I can tell, if you advertise this extension > applications will expect a swap event. Which will never come if > dri/glx on client side remaps scheduleswap to copyregion. > > So maybe there are other conditions where we should not advertise this > extension. But if we know we will never get events, we should not > advertise this extension. The issue isn't on this side, it's on the Xserver side. We don't advertise extensions that aren't advertised by the server, unfortunately Xserver unconditionally enables this extension. I've sent a patch to xorg-devel at least limiting exposure ( http://lists.x.org/archives/xorg-devel/2013-February/035449.html ) but it hasn't been applied. The only reason for the vmwgfx hack is that we have a shipping driver that badly broke with the new Xserver so instead of leaving our users with broken systems we disable the extension on the client side. That isn't the correct approach though, in fact it's wrong, but it keeps those systems working until fixed xserver is out. I'd prefer to keep more hacks to fix this situation out of mesa. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't
> hmm, well, I think my fix is not incorrect.. we can tell from dri2 > proto version that the xserver does not support ScheduleSwap. Maybe > there should be other conditions where we also don't advertise this > extension, but this patch still improves things. If we absolutely > know from the dri2 proto version that ScheduleSwap is not supported, > then we should not advertise this extension. And GLX_MESA_swap_control and GLX_SGI_make_current_read and the same for every extension which should be checked and advertised correctly by the Xserver. The issue is that you shouldn't worry about those because Xserver should check and advertise correctly what it supports. The issue with using swap_control symbols with checking for swap_event is that it creates arbitrary distinction between those two extensions on the client side only because Xserver does the correct thing for one of them and not the other. > Without this, gnome-shell (and mutter/clutter) on freedreno is broken. > I'd rather not filter out based on the driver name, because when I > eventually have a display driver where I can support swap, and bump > the dri2 version #, I'd like this extension to be advertised. TBH, I don't think you need this check at all, you just need a fixed xserver which doesn't advertise intel_swap_event if it doesn't support it. Until freedreno is shipped you don't have to worry about Xserver breaking the extension strings because you control the environment. And just to be clear, I'm not nacking this patch, I just think it's silly to keep fixing Xserver bugs in mesa, but if you really hate the check for names, then please remove the strcpy vmwgfx and fix the comment above the check so that we have one master hack for this extension instead of accumulating a number of them. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't
> well, I'm more familiar w/ EGL where we don't have the xserver > advertising anything, and it is all on the client side.. but when it > is an inexpensive check, it seems reasonable to want mesa to do the > right thing where possible. It's simply silly. In the same sense that adding yet another if (ptr) to "if (ptr) if (ptr) FREE(ptr);" while not technically wrong is simply silly. Like I said we already check whether those extensions are advertised by the server and don't advertise the ones that aren't. > Probably there are other cases where we > should do the same thing. I can update my patch to also exclude other > extensions No, the point it that we don't want to do that. It's fundamentally broken and you know that it's broken because you'll notice that this extension is still advertised by the server (for our sake that's all required to fix Clutter, but it's still broken). It's a weird thing for an extension which is implemented by the server to be advertised by the server and yet having a client which is essentially not involved at all, not be advertising it. The only reason we have to worry about this is that the server is broken. So while we might want to make things easier on us by not forcing users to keep repatching the Xserver we shouldn't have any illusions about what this is: it's a nasty hack required by a bug in the Xserver. As such that code has only two requirements: 1) That all drivers requiring that hack go through the same codepath and that it's as minimal as possible so it's trivial to remove it once a fixed Xserver gets into most distros. 2) That it's clearly documented as hack thanks to which anyone reading this code will immediately understand what's the purpose of the weird code and what are the prerequisites for removing it. Everything else is of no consequence in this case. So whether you'll decide to use names or some any number of other extensions that came after dri2inforec version 4 to check for makes no difference as long as it fulfills the two above goals. > true, it is not shipping in any distro yet, so anyone who wants to try > it gets to try git master of mesa, which runs into problems because of > advertising the INTEL_swap extension. Asking everyone to rebuild > xserver with some extra patch which is not merged yet is a big pita. Sure, but at the same time adding hacks to shared mesa code to make it easier to try a dev driver doesn't make terribly convincing argument. In the end though, at least in this case, the bug is severe enough that a hack in mesa makes sense and we've spent too much time discussing a very simple issue, so whatever you do just please make sure to fulfill the two requirements above and everything will be ok. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] graw/gs: add missing max output vertices to all tests
A few tests were missing this crucial property. Signed-off-by: Zack Rusin --- src/gallium/tests/graw/geometry-shader/add-mix.txt |1 + src/gallium/tests/graw/geometry-shader/add.txt |1 + src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt |1 + src/gallium/tests/graw/geometry-shader/mov.txt |1 + 4 files changed, 4 insertions(+) diff --git a/src/gallium/tests/graw/geometry-shader/add-mix.txt b/src/gallium/tests/graw/geometry-shader/add-mix.txt index 63e689a..bbe4164 100644 --- a/src/gallium/tests/graw/geometry-shader/add-mix.txt +++ b/src/gallium/tests/graw/geometry-shader/add-mix.txt @@ -1,6 +1,7 @@ GEOM PROPERTY GS_INPUT_PRIMITIVE TRIANGLES PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 3 DCL IN[][0], POSITION, CONSTANT DCL IN[][1], COLOR, CONSTANT DCL OUT[0], POSITION, CONSTANT diff --git a/src/gallium/tests/graw/geometry-shader/add.txt b/src/gallium/tests/graw/geometry-shader/add.txt index d8c7c41..8373dac 100644 --- a/src/gallium/tests/graw/geometry-shader/add.txt +++ b/src/gallium/tests/graw/geometry-shader/add.txt @@ -1,6 +1,7 @@ GEOM PROPERTY GS_INPUT_PRIMITIVE TRIANGLES PROPERTY GS_OUTPUT_PRIMITIVE LINE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 3 DCL IN[][0], POSITION, CONSTANT DCL IN[][1], COLOR, CONSTANT DCL OUT[0], POSITION, CONSTANT diff --git a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt index 058acfb..339fd1e 100644 --- a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt +++ b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt @@ -1,6 +1,7 @@ GEOM PROPERTY GS_INPUT_PRIMITIVE TRIANGLES PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 3 DCL IN[][0], POSITION, CONSTANT DCL IN[][1], COLOR, CONSTANT DCL OUT[0], POSITION, CONSTANT diff --git a/src/gallium/tests/graw/geometry-shader/mov.txt b/src/gallium/tests/graw/geometry-shader/mov.txt index 97150a5..ad141f1 100644 --- a/src/gallium/tests/graw/geometry-shader/mov.txt +++ b/src/gallium/tests/graw/geometry-shader/mov.txt @@ -1,6 +1,7 @@ GEOM PROPERTY GS_INPUT_PRIMITIVE TRIANGLES PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 3 DCL IN[][0], POSITION, CONSTANT DCL IN[][1], COLOR, CONSTANT DCL OUT[0], POSITION, CONSTANT -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] gallivm: implement breakc and implicit primitive flushing
we were missing implementation of the breakc instruction and our TGSI semantics currently require an implicit endprim at the end of GS if none is present - this implements both. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|6 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 38 3 files changed, 45 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 4c6456e..4acc592 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -392,6 +392,12 @@ struct lp_build_tgsi_soa_context LLVMValueRef emitted_prims_vec; LLVMValueRef total_emitted_vertices_vec; LLVMValueRef emitted_vertices_vec; + /* if a shader doesn't have ENDPRIM instruction but it has +* a number of EMIT instructions it means the END instruction +* implicitly invokes ENDPRIM. handle this via a flag here +* in the future maybe we can enforce TGSI to always have +* an explicit ENDPRIM */ + boolean pending_end_primitive; LLVMValueRef consts_ptr; const LLVMValueRef *pos; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 41ddd99..55bb8e3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -867,6 +867,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args; bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args; bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args; bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args; bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args; bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 95633ab..36e49ac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -213,6 +213,23 @@ static void lp_exec_break(struct lp_exec_mask *mask) lp_exec_mask_update(mask); } + +static void lp_exec_break_condition(struct lp_exec_mask *mask, LLVMValueRef cond) +{ + LLVMBuilderRef builder = mask->bld->gallivm->builder; + LLVMValueRef exec_mask = LLVMBuildNot(builder, + mask->exec_mask, + "break"); + + exec_mask = LLVMBuildAnd(builder, exec_mask, cond, ""); + + mask->break_mask = LLVMBuildAnd(builder, + mask->break_mask, + exec_mask, "break_full"); + + lp_exec_mask_update(mask); +} + static void lp_exec_continue(struct lp_exec_mask *mask) { LLVMBuilderRef builder = mask->bld->gallivm->builder; @@ -2190,6 +2207,7 @@ emit_vertex( LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, ""); bld->total_emitted_vertices_vec = LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, ""); + bld->pending_end_primitive = TRUE; } } @@ -2212,6 +2230,7 @@ end_primitive( bld->emitted_prims_vec = LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, ""); bld->emitted_vertices_vec = bld_base->uint_bld.zero; + bld->pending_end_primitive = FALSE; } } @@ -2250,6 +2269,17 @@ brk_emit( } static void +breakc_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + lp_exec_break_condition(&bld->exec_mask, emit_data->args[0]); +} + +static void if_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -2504,6 +2534,12 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base) /* If we have indirect addressing in outputs we need to copy our alloca array * to the outputs slots specified by the caller */ if (bld->gs_args) { + /* flush the accumulated vertices as a primitive */ + if (bld->pending_end_primitive) { + end_primitive(NULL, bld_base, NULL); + bld->pending_end_primitive = FALSE; + } + bld->gs_args->gs_epilogue(&bld->bld_base, bld->total_emitted_vertices_vec, bld->emitted_p
[Mesa-dev] [PATCH 0/6] Gallivm GS and related cleanups
This set implements code generation of geometry shaders in the LLVM paths. There are some cleanups that will follow (e.g. changing the input array to handle lp_native vectors and not TGSI_NUM_CHANNELS) but all the simple examples are working and as far as I can tell there's no regressions in the common code so we can push it now. Zack Rusin (6): graw/gs: add missing max output vertices to all tests draw/llvm: Remove unused gs_constants from jit_context draw/gs: Abstract the portions of GS that are tgsi specific draw/gs: Fetch more than one primitive per invocation gallium/llvm: implement geometry shaders in the llvm paths gallivm: implement breakc and implicit primitive flushing src/gallium/auxiliary/draw/draw_context.c | 17 +- src/gallium/auxiliary/draw/draw_context.h |5 + src/gallium/auxiliary/draw/draw_gs.c | 585 +++- src/gallium/auxiliary/draw/draw_gs.h | 41 +- src/gallium/auxiliary/draw/draw_llvm.c | 584 +-- src/gallium/auxiliary/draw/draw_llvm.h | 176 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 77 ++- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h| 33 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 209 ++- src/gallium/drivers/llvmpipe/lp_state_fs.c |4 +- src/gallium/tests/graw/geometry-shader/add-mix.txt |1 + src/gallium/tests/graw/geometry-shader/add.txt |1 + .../tests/graw/geometry-shader/mov-cb-2d.txt |1 + src/gallium/tests/graw/geometry-shader/mov.txt |1 + 15 files changed, 1511 insertions(+), 225 deletions(-) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] draw/llvm: Remove unused gs_constants from jit_context
The member was never used and we'll need to handle it differently because gs will also need samplers/textures setup. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c| 17 +++-- src/gallium/auxiliary/draw/draw_llvm.h| 15 --- .../draw/draw_pt_fetch_shade_pipeline_llvm.c |4 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 20c9b79..602839d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -189,18 +189,17 @@ create_jit_context_type(struct gallivm_state *gallivm, { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); - LLVMTypeRef elem_types[6]; + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */ LP_MAX_TGSI_CONST_BUFFERS); - elem_types[1] = elem_types[0]; /* gs_constants */ - elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), + elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0); - elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ - elem_types[4] = LLVMArrayType(texture_type, + elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ - elem_types[5] = LLVMArrayType(sampler_type, + elem_types[4] = LLVMArrayType(sampler_type, PIPE_MAX_SAMPLERS); /* samplers */ context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -212,12 +211,10 @@ create_jit_context_type(struct gallivm_state *gallivm, LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, - target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, - target, context_type, 2); + target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewport, - target, context_type, 3); + target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, target, context_type, DRAW_JIT_CTX_TEXTURES); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index c9f125b..c03c69e 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -106,9 +106,6 @@ enum { DRAW_JIT_VERTEX_DATA }; -#define DRAW_JIT_CTX_TEXTURES 4 -#define DRAW_JIT_CTX_SAMPLERS 5 - /** * This structure is passed directly to the generated vertex shader. * @@ -123,7 +120,6 @@ enum { struct draw_jit_context { const float *vs_constants[LP_MAX_TGSI_CONST_BUFFERS]; - const float *gs_constants[LP_MAX_TGSI_CONST_BUFFERS]; float (*planes) [DRAW_TOTAL_CLIP_PLANES][4]; float *viewport; @@ -135,17 +131,14 @@ struct draw_jit_context #define draw_jit_context_vs_constants(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, 0, "vs_constants") -#define draw_jit_context_gs_constants(_gallivm, _ptr) \ - lp_build_struct_get_ptr(_gallivm, _ptr, 1, "gs_constants") - #define draw_jit_context_planes(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 2, "planes") + lp_build_struct_get(_gallivm, _ptr, 1, "planes") #define draw_jit_context_viewport(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 3, "viewport") + lp_build_struct_get(_gallivm, _ptr, 2, "viewport") -#define DRAW_JIT_CTX_TEXTURES 4 -#define DRAW_JIT_CTX_SAMPLERS 5 +#define DRAW_JIT_CTX_TEXTURES 3 +#define DRAW_JIT_CTX_SAMPLERS 4 #define draw_jit_context_textures(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 288b524..fab168c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -200,10 +200,6 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; } - for (i = 0; i < Elements(fpme->llvm->jit_conte
[Mesa-dev] [PATCH 3/6] draw/gs: Abstract the portions of GS that are tgsi specific
To be able to add llvm paths later on we need to have some common interface for them. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c | 270 ++ src/gallium/auxiliary/draw/draw_gs.h | 14 ++ 2 files changed, 156 insertions(+), 128 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index e605965..81d9140 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -42,112 +42,6 @@ /* fixme: move it from here */ #define MAX_PRIMITIVES 64 -boolean -draw_gs_init( struct draw_context *draw ) -{ - draw->gs.tgsi.machine = tgsi_exec_machine_create(); - if (!draw->gs.tgsi.machine) - return FALSE; - - draw->gs.tgsi.machine->Primitives = align_malloc( - MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); - if (!draw->gs.tgsi.machine->Primitives) - return FALSE; - memset(draw->gs.tgsi.machine->Primitives, 0, - MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); - - return TRUE; -} - -void draw_gs_destroy( struct draw_context *draw ) -{ - if (!draw->gs.tgsi.machine) - return; - - align_free(draw->gs.tgsi.machine->Primitives); - - tgsi_exec_machine_destroy(draw->gs.tgsi.machine); -} - -struct draw_geometry_shader * -draw_create_geometry_shader(struct draw_context *draw, -const struct pipe_shader_state *state) -{ - struct draw_geometry_shader *gs; - unsigned i; - - gs = CALLOC_STRUCT(draw_geometry_shader); - - if (!gs) - return NULL; - - gs->draw = draw; - gs->state = *state; - gs->state.tokens = tgsi_dup_tokens(state->tokens); - if (!gs->state.tokens) { - FREE(gs); - return NULL; - } - - tgsi_scan_shader(state->tokens, &gs->info); - - /* setup the defaults */ - gs->input_primitive = PIPE_PRIM_TRIANGLES; - gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; - gs->max_output_vertices = 32; - - for (i = 0; i < gs->info.num_properties; ++i) { - if (gs->info.properties[i].name == - TGSI_PROPERTY_GS_INPUT_PRIM) - gs->input_primitive = gs->info.properties[i].data[0]; - else if (gs->info.properties[i].name == - TGSI_PROPERTY_GS_OUTPUT_PRIM) - gs->output_primitive = gs->info.properties[i].data[0]; - else if (gs->info.properties[i].name == - TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) - gs->max_output_vertices = gs->info.properties[i].data[0]; - } - - gs->machine = draw->gs.tgsi.machine; - - if (gs) - { - uint i; - for (i = 0; i < gs->info.num_outputs; i++) { - if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && - gs->info.output_semantic_index[i] == 0) -gs->position_output = i; - } - } - - return gs; -} - -void draw_bind_geometry_shader(struct draw_context *draw, - struct draw_geometry_shader *dgs) -{ - draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); - - if (dgs) { - draw->gs.geometry_shader = dgs; - draw->gs.num_gs_outputs = dgs->info.num_outputs; - draw->gs.position_output = dgs->position_output; - draw_geometry_shader_prepare(dgs, draw); - } - else { - draw->gs.geometry_shader = NULL; - draw->gs.num_gs_outputs = 0; - } -} - -void draw_delete_geometry_shader(struct draw_context *draw, - struct draw_geometry_shader *dgs) -{ - FREE(dgs->primitive_lengths); - FREE((void*) dgs->state.tokens); - FREE(dgs); -} - static INLINE int draw_gs_get_input_index(int semantic, int index, const struct tgsi_shader_info *input_info) @@ -165,10 +59,10 @@ draw_gs_get_input_index(int semantic, int index, } /*#define DEBUG_OUTPUTS 1*/ -static INLINE void -draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, -unsigned num_primitives, -float (**p_output)[4]) +static void +tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, + unsigned num_primitives, + float (**p_output)[4]) { struct tgsi_exec_machine *machine = shader->machine; unsigned prim_idx, j, slot; @@ -212,7 +106,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, } /*#define DEBUG_INPUTS 1*/ -static void draw_fetch_gs_input(struct draw_geometry_shader *shader, +static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, unsigned *indices, unsigned num_vertices, unsigned prim_idx) @@ -275,14 +169,20 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, } } -static void gs_flush(struct draw_geometry_shader *shader, -
[Mesa-dev] [PATCH 4/6] draw/gs: Fetch more than one primitive per invocation
Allows executing gs on up to 4 primitives at a time. Will also be required by the llvm code because there we definitely don't want to flush with just a single primitive. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c | 54 ++ src/gallium/auxiliary/draw/draw_gs.h |1 + 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 81d9140..2b50c9c 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -58,6 +58,12 @@ draw_gs_get_input_index(int semantic, int index, return -1; } +static INLINE boolean +draw_gs_should_flush(struct draw_geometry_shader *shader) +{ + return (shader->fetched_prim_count == 4); +} + /*#define DEBUG_OUTPUTS 1*/ static void tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, @@ -197,13 +203,14 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; } -static void gs_flush(struct draw_geometry_shader *shader, - unsigned input_primitives) +static void gs_flush(struct draw_geometry_shader *shader) { unsigned out_prim_count; + unsigned input_primitives = shader->fetched_prim_count; + debug_assert(input_primitives > 0 && -input_primitives < 4); +input_primitives <= 4); out_prim_count = shader->run(shader, input_primitives); #if 0 @@ -213,6 +220,7 @@ static void gs_flush(struct draw_geometry_shader *shader, #endif shader->fetch_outputs(shader, out_prim_count, &shader->tmp_output); + shader->fetched_prim_count = 0; } static void gs_point(struct draw_geometry_shader *shader, @@ -222,10 +230,12 @@ static void gs_point(struct draw_geometry_shader *shader, indices[0] = idx; - shader->fetch_inputs(shader, indices, 1, 0); + shader->fetch_inputs(shader, indices, 1, +shader->fetched_prim_count); ++shader->in_prim_idx; + ++shader->fetched_prim_count; - gs_flush(shader, 1); + gs_flush(shader); } static void gs_line(struct draw_geometry_shader *shader, @@ -236,10 +246,12 @@ static void gs_line(struct draw_geometry_shader *shader, indices[0] = i0; indices[1] = i1; - shader->fetch_inputs(shader, indices, 2, 0); + shader->fetch_inputs(shader, indices, 2, +shader->fetched_prim_count); ++shader->in_prim_idx; + ++shader->fetched_prim_count; - gs_flush(shader, 1); + gs_flush(shader); } static void gs_line_adj(struct draw_geometry_shader *shader, @@ -252,10 +264,12 @@ static void gs_line_adj(struct draw_geometry_shader *shader, indices[2] = i2; indices[3] = i3; - shader->fetch_inputs(shader, indices, 4, 0); + shader->fetch_inputs(shader, indices, 4, +shader->fetched_prim_count); ++shader->in_prim_idx; + ++shader->fetched_prim_count; - gs_flush(shader, 1); + gs_flush(shader); } static void gs_tri(struct draw_geometry_shader *shader, @@ -267,10 +281,12 @@ static void gs_tri(struct draw_geometry_shader *shader, indices[1] = i1; indices[2] = i2; - shader->fetch_inputs(shader, indices, 3, 0); + shader->fetch_inputs(shader, indices, 3, +shader->fetched_prim_count); ++shader->in_prim_idx; + ++shader->fetched_prim_count; - gs_flush(shader, 1); + gs_flush(shader); } static void gs_tri_adj(struct draw_geometry_shader *shader, @@ -286,10 +302,12 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, indices[4] = i4; indices[5] = i5; - shader->fetch_inputs(shader, indices, 6, 0); + shader->fetch_inputs(shader, indices, 6, +shader->fetched_prim_count); ++shader->in_prim_idx; + ++shader->fetched_prim_count; - gs_flush(shader, 1); + gs_flush(shader); } #define FUNC gs_run @@ -354,6 +372,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, shader->vertex_size = vertex_size; shader->tmp_output = (float (*)[4])output_verts->verts->data; shader->in_prim_idx = 0; + shader->fetched_prim_count = 0; shader->input_vertex_stride = input_stride; shader->input = input; shader->input_info = input_info; @@ -369,6 +388,15 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, gs_run_elts(shader, input_prim, input_verts, output_prims, output_verts); + /* Flush the remaining primitives. Will happen if +* num_input_primitives % 4 != 0 +*/ + if (shader->fetched_prim_count > 0) { + gs_flush(shader); + } + + debug_assert(shader->fetched_prim_count == 0); + /* Update prim_info:
[Mesa-dev] [PATCH 0/3] A few fixes for the llvm geometry shaders
A few cleanups, plus fixes for texture sampling for the llvm geometry shaders. This is on top of the previous patchset. Zack Rusin (3): draw: Allocate the output buffer for output primitives draw/llvm: Cleanup the store debugging code llvmpipe/draw: Fix texture sampling in geometry shaders src/gallium/auxiliary/draw/draw_context.c |4 +- src/gallium/auxiliary/draw/draw_gs.c|3 +- src/gallium/auxiliary/draw/draw_llvm.c | 96 - src/gallium/auxiliary/draw/draw_llvm.h | 31 +++--- src/gallium/drivers/llvmpipe/lp_context.c |4 + src/gallium/drivers/llvmpipe/lp_context.h |1 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c |4 + src/gallium/drivers/llvmpipe/lp_state.h |8 ++ src/gallium/drivers/llvmpipe/lp_state_sampler.c | 127 +++ 9 files changed, 211 insertions(+), 67 deletions(-) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] draw: Allocate the output buffer for output primitives
We were allocating the output buffer but using the input primitives. We need to allocate that buffer using the maximum number of output, not input, primitives. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_gs.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index c1e1f56..85ea04f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -500,10 +500,9 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, output_verts->stride = output_verts->vertex_size; output_verts->verts = (struct vertex_header *)MALLOC(output_verts->vertex_size * - num_in_primitives * + max_out_prims * shader->max_output_vertices); - #if 0 debug_printf("%s count = %d (in prims # = %d)\n", __FUNCTION__, num_input_verts, num_in_primitives); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] draw/llvm: Cleanup the store debugging code
Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_llvm.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 1b0b1b9..f857183 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -716,13 +716,13 @@ store_aos(struct gallivm_state *gallivm, indices[1] = index; indices[2] = lp_build_const_int32(gallivm, 0); + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, ""); + #if DEBUG_STORE lp_build_printf(gallivm, " %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); #endif - data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); - data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, ""); - /* Unaligned store due to the vertex header */ lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float)); } @@ -826,7 +826,7 @@ store_aos_array(struct gallivm_state *gallivm, val = adjust_mask(gallivm, val); LLVMBuildStore(builder, val, id_ptr); #if DEBUG_STORE - lp_build_printf(gallivm, "io = %p, index %d\n, clipmask = %x\n", + lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n", io_ptrs[i], inds[i], val); #endif } @@ -1290,9 +1290,6 @@ draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * bld_base, LLVMValueRef num_vertices = LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, ""); - /*lp_build_printf(gallivm, " emitting vertices, %d\n\n", -num_vertices);*/ - store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, ""); store_ptr = LLVMBuildLoad(builder, store_ptr, ""); store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, ""); @@ -1318,7 +1315,7 @@ draw_gs_llvm_epilogue(struct lp_build_tgsi_context * bld_base, emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, ""); emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, ""); - + LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr); LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr); } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] llvmpipe/draw: Fix texture sampling in geometry shaders
We weren't correctly propagating the samplers and sampler views when they were related to geometry shaders. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_context.c |4 +- src/gallium/auxiliary/draw/draw_llvm.c | 83 --- src/gallium/auxiliary/draw/draw_llvm.h | 31 +++--- src/gallium/drivers/llvmpipe/lp_context.c |4 + src/gallium/drivers/llvmpipe/lp_context.h |1 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c |4 + src/gallium/drivers/llvmpipe/lp_state.h |8 ++ src/gallium/drivers/llvmpipe/lp_state_sampler.c | 127 +++ 8 files changed, 205 insertions(+), 57 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d64b82b..ceb74df 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -792,8 +792,8 @@ draw_set_samplers(struct draw_context *draw, draw->num_samplers[shader_stage] = num; #ifdef HAVE_LLVM - if (draw->llvm && shader_stage == PIPE_SHADER_VERTEX) - draw_llvm_set_sampler_state(draw); + if (draw->llvm) + draw_llvm_set_sampler_state(draw, shader_stage); #endif } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index f857183..3e47452 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -249,17 +249,17 @@ create_gs_jit_context_type(struct gallivm_state *gallivm, elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0); elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */ - - elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0); - elem_types[4] = LLVMPointerType(LLVMVectorType(int_type, - vector_length), 0); - elem_types[5] = LLVMPointerType(LLVMVectorType(int_type, - vector_length), 0); - elem_types[6] = LLVMArrayType(texture_type, + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ - elem_types[7] = LLVMArrayType(sampler_type, + elem_types[4] = LLVMArrayType(sampler_type, PIPE_MAX_SAMPLERS); /* samplers */ + + elem_types[5] = LLVMPointerType(LLVMPointerType(int_type, 0), 0); + elem_types[6] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); + elem_types[7] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -275,18 +275,18 @@ create_gs_jit_context_type(struct gallivm_state *gallivm, target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport, target, context_type, 2); - LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths, - target, context_type, 3); - LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices, - target, context_type, 4); - LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims, - target, context_type, 5); LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures, target, context_type, - DRAW_GS_JIT_CTX_TEXTURES); + DRAW_JIT_CTX_TEXTURES); LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers, target, context_type, - DRAW_GS_JIT_CTX_SAMPLERS); + DRAW_JIT_CTX_SAMPLERS); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths, + target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices, + target, context_type, 6); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims, + target, context_type, 7); LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context, target, context_type); @@ -1721,33 +1721,36 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, void -draw_llvm_set_sampler_state(struct draw_context *draw) +draw_llvm_set_sampler_state(struct draw_context *draw, +unsigned shader_type) { unsigned i; - for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) { - struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i]; - - if (draw->samplers[i]) { - const struct pipe_sa