from:"Zack Rusin"

[Mesa-dev] [PATCH 1/3] gallivm: support printing of 64 bit integers

2013-10-08 Thread Zack Rusin

only 8 and 32 bit integers were supported before.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/gallivm/lp_bld_printf.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c 
b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
index 1324da2..d06209a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -106,7 +106,11 @@ lp_build_print_value(struct gallivm_state *gallivm,
   type_fmt[4] = 'g';
   type_fmt[5] = '\0';
} else if (type_kind == LLVMIntegerTypeKind) {
-  if (LLVMGetIntTypeWidth(type_ref) == 8) {
+  if (LLVMGetIntTypeWidth(type_ref) == 64) {
+ type_fmt[2] = 'l';
+ type_fmt[3] = 'd';
+ type_fmt[4] = '\0';
+  } else if (LLVMGetIntTypeWidth(type_ref) == 8) {
  type_fmt[2] = 'u';
   } else {
  type_fmt[2] = 'i';
-- 
1.8.1.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] gallium: Add support for 32x32 muls with 64 bit results

2013-10-08 Thread Zack Rusin

The code introduces two new 32bit integer multiplication opcodes which
can be used to produce correct 64 bit results. GLSL, OpenCL and D3D10+
require them. We use two seperate opcodes, because they match the
behavior of GLSL and OpenCL, are a lot easier to add than a single
opcode with multiple destinations and because there's not much (any)
difference wrt code-generation.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 34 ++
 src/gallium/auxiliary/tgsi/tgsi_info.c |  6 
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |  3 ++
 src/gallium/auxiliary/tgsi/tgsi_util.c |  2 ++
 src/gallium/docs/source/tgsi.rst   | 30 +++
 src/gallium/include/pipe/p_shader_tokens.h |  5 +++-
 .../tests/graw/vertex-shader/vert-imul_hi.sh   | 13 +
 .../tests/graw/vertex-shader/vert-umul_hi.sh   | 11 +++
 8 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh
 create mode 100644 src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 0750a50..6db1238 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3478,6 +3478,32 @@ micro_umul(union tgsi_exec_channel *dst,
 }
 
 static void
+micro_imul_hi(union tgsi_exec_channel *dst,
+  const union tgsi_exec_channel *src0,
+  const union tgsi_exec_channel *src1)
+{
+#define I64M(x, y) int64_t)x) * ((int64_t)y)) >> 32)
+   dst->i[0] = I64M(src0->i[0], src1->i[0]);
+   dst->i[1] = I64M(src0->i[1], src1->i[1]);
+   dst->i[2] = I64M(src0->i[2], src1->i[2]);
+   dst->i[3] = I64M(src0->i[3], src1->i[3]);
+#undef I64M
+}
+
+static void
+micro_umul_hi(union tgsi_exec_channel *dst,
+  const union tgsi_exec_channel *src0,
+  const union tgsi_exec_channel *src1)
+{
+#define U64M(x, y) uint64_t)x) * ((uint64_t)y)) >> 32)
+   dst->u[0] = U64M(src0->u[0], src1->u[0]);
+   dst->u[1] = U64M(src0->u[1], src1->u[1]);
+   dst->u[2] = U64M(src0->u[2], src1->u[2]);
+   dst->u[3] = U64M(src0->u[3], src1->u[3]);
+#undef U64M
+}
+
+static void
 micro_useq(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1)
@@ -4277,6 +4303,14 @@ exec_instruction(
   exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
   break;
 
+   case TGSI_OPCODE_IMUL_HI:
+  exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_INT);
+  break;
+
+   case TGSI_OPCODE_UMUL_HI:
+  exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+  break;
+
case TGSI_OPCODE_USEQ:
   exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 7a5d18f..0beef44 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -219,6 +219,8 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
{ 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
{ 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
+   { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
+   { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
 };
 
 const struct tgsi_opcode_info *
@@ -297,6 +299,7 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_SVIEWINFO:
+   case TGSI_OPCODE_UMUL_HI:
   return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
@@ -317,6 +320,7 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_IABS:
case TGSI_OPCODE_ISSG:
+   case TGSI_OPCODE_IMUL_HI:
   return TGSI_TYPE_SIGNED;
default:
   return TGSI_TYPE_FLOAT;
@@ -339,7 +343,9 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_CASE:
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
+   case TGSI_OPCODE_UMUL_HI:
   return TGSI_TYPE_UNSIGNED;
+   case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
   return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_ARL:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index b8144a8..1ef78dd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -204,6 +204,9 @@ OP12(SAMPLE_INFO)
 
 OP13(UCMP)
 
+OP12(IMUL_HI)
+OP12(UMUL_HI)
+
 #undef OP00
 #undef OP01
 #undef OP10
diff --git a/src/

[Mesa-dev] [PATCH 3/3] llvmpipe: implement 64 bit mul opcodes in llvmpipe

2013-10-08 Thread Zack Rusin

Both the imul_hi and umul_hi are working with this patch.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 1cfaf78..8caaf83 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -763,6 +763,64 @@ umul_emit(
emit_data->args[0], emit_data->args[1]);
 }
 
+/* TGSI_OPCODE_IMUL_HI */
+static void
+imul_hi_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   struct lp_build_context *int_bld = &bld_base->int_bld;
+   struct lp_type type = int_bld->type;
+   LLVMValueRef src0, src1;
+   LLVMValueRef dst64;
+   LLVMTypeRef typeRef;
+
+   assert(type.width == 32);
+   type.width = 64;
+   typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
+   src0 = LLVMBuildSExt(builder, emit_data->args[0], typeRef, "");
+   src1 = LLVMBuildSExt(builder, emit_data->args[1], typeRef, "");
+   dst64 = LLVMBuildMul(builder, src0, src1, "");
+   dst64 = LLVMBuildAShr(
+builder, dst64,
+lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
+   type.width = 32;
+   typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
+   emit_data->output[emit_data->chan] =
+ LLVMBuildTrunc(builder, dst64, typeRef, "");
+}
+
+/* TGSI_OPCODE_UMUL_HI */
+static void
+umul_hi_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   struct lp_type type = uint_bld->type;
+   LLVMValueRef src0, src1;
+   LLVMValueRef dst64;
+   LLVMTypeRef typeRef;
+
+   assert(type.width == 32);
+   type.width = 64;
+   typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
+   src0 = LLVMBuildZExt(builder, emit_data->args[0], typeRef, "");
+   src1 = LLVMBuildZExt(builder, emit_data->args[1], typeRef, "");
+   dst64 = LLVMBuildMul(builder, src0, src1, "");
+   dst64 = LLVMBuildLShr(
+builder, dst64,
+lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
+   type.width = 32;
+   typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
+   emit_data->output[emit_data->chan] =
+ LLVMBuildTrunc(builder, dst64, typeRef, "");
+}
+
 /* TGSI_OPCODE_MAX */
 static void fmax_emit(
const struct lp_build_tgsi_action * action,
@@ -894,6 +952,8 @@ lp_set_default_actions(struct lp_build_tgsi_context * 
bld_base)
bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
+   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
+   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
 
bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
-- 
1.8.1.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] llvmpipe: abstract the code to set number of subpixel bits

2013-10-08 Thread Zack Rusin

As we're moving towards expanding the number of subpixel
bits and the width of the variables used in the computations
we need to make this code a bit more centralized.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_rast.h  |  9 +
 src/gallium/drivers/llvmpipe/lp_setup.c | 14 +-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h 
b/src/gallium/drivers/llvmpipe/lp_rast.h
index c57f2ea..43c598d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -46,9 +46,18 @@ struct lp_scene;
 struct lp_fence;
 struct cmd_bin;
 
+#define FIXED_TYPE_WIDTH 32
 /** For sub-pixel positioning */
 #define FIXED_ORDER 4
 #define FIXED_ONE (1<draw_regions[i]);
  }
   }
-  /* If the framebuffer is large we have to think about fixed-point
-   * integer overflow.  For 2K by 2K images, coordinates need 15 bits
-   * (2^11 + 4 subpixel bits).  The product of two such numbers would
-   * use 30 bits.  Any larger and we could overflow a 32-bit int.
-   *
-   * To cope with this problem we check if triangles are large and
-   * subdivide them if needed.
+  /*
+   * Subdivide triangles if the framebuffer is larger than the
+   * MAX_FIXED_LENGTH.
*/
-  setup->subdivide_large_triangles = (setup->fb.width > 2048 ||
-  setup->fb.height > 2048);
+  setup->subdivide_large_triangles = (setup->fb.width > MAX_FIXED_LENGTH ||
+  setup->fb.height > MAX_FIXED_LENGTH);
}
   
setup->dirty = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 051ffa0..9cc81e9 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -988,7 +988,7 @@ check_subdivide_triangle(struct lp_setup_context *setup,
  const float (*v2)[4],
  triangle_func_t tri)
 {
-   const float maxLen = 2048.0f;  /* longest permissible edge, in pixels */
+   const float maxLen = MAX_FIXED_LENGTH;  /* longest permissible edge, in 
pixels */
float dx10, dy10, len10;
float dx21, dy21, len21;
float dx02, dy02, len02;
-- 
1.8.1.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] graw: add a test rendering a huge triangle

2013-10-24 Thread Zack Rusin

Used to test rasterization, because we often breakdown on
subdivision of triangles with long edges.

Signed-off-by: Zack Rusin 
---
 src/gallium/tests/graw/SConscript  |   1 +
 src/gallium/tests/graw/tri-large.c | 173 +
 2 files changed, 174 insertions(+)
 create mode 100644 src/gallium/tests/graw/tri-large.c

diff --git a/src/gallium/tests/graw/SConscript 
b/src/gallium/tests/graw/SConscript
index 8740ff3..8723807 100644
--- a/src/gallium/tests/graw/SConscript
+++ b/src/gallium/tests/graw/SConscript
@@ -29,6 +29,7 @@ progs = [
 'tex-srgb',
 'tex-swizzle',
 'tri',
+'tri-large',
 'tri-gs',
 'tri-instanced',
 'vs-test',
diff --git a/src/gallium/tests/graw/tri-large.c 
b/src/gallium/tests/graw/tri-large.c
new file mode 100644
index 000..3fbbfb3
--- /dev/null
+++ b/src/gallium/tests/graw/tri-large.c
@@ -0,0 +1,173 @@
+/* Display a cleared blue window.  This demo has no dependencies on
+ * any utility code, just the graw interface and gallium.
+ */
+
+#include "graw_util.h"
+#include "util/u_debug.h"
+
+#include 
+
+static struct graw_info info;
+
+static const int WIDTH = 4*2048;
+static const int HEIGHT = 4*2048;
+
+
+struct vertex {
+   float position[4];
+   float color[4];
+};
+
+static boolean FlatShade = FALSE;
+
+
+static struct vertex vertices[3] =
+{
+   {
+  { -1.0f, -1.0f, 0.0f, 1.0f },
+  { 1.0f, 0.0f, 0.0f, 1.0f }
+   },
+   {
+  { -1.0f, 1.0f, 0.0f, 1.0f },
+  { 0.0f, 1.0f, 0.0f, 1.0f }
+   },
+   {
+  { 1.0f, 1.0f, 0.0f, 1.0f },
+  { 0.0f, 0.0f, 1.0f, 1.0f }
+   }
+};
+
+
+static void set_vertices( void )
+{
+   struct pipe_vertex_element ve[2];
+   struct pipe_vertex_buffer vbuf;
+   void *handle;
+
+   memset(ve, 0, sizeof ve);
+
+   ve[0].src_offset = Offset(struct vertex, position);
+   ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   ve[1].src_offset = Offset(struct vertex, color);
+   ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+
+   handle = info.ctx->create_vertex_elements_state(info.ctx, 2, ve);
+   info.ctx->bind_vertex_elements_state(info.ctx, handle);
+
+   memset(&vbuf, 0, sizeof vbuf);
+
+   vbuf.stride = sizeof( struct vertex );
+   vbuf.buffer_offset = 0;
+   vbuf.buffer = pipe_buffer_create_with_data(info.ctx,
+  PIPE_BIND_VERTEX_BUFFER,
+  PIPE_USAGE_STATIC,
+  sizeof(vertices),
+  vertices);
+
+   info.ctx->set_vertex_buffers(info.ctx, 0, 1, &vbuf);
+}
+
+
+static void set_vertex_shader( void )
+{
+   void *handle;
+   const char *text =
+  "VERT\n"
+  "DCL IN[0]\n"
+  "DCL IN[1]\n"
+  "DCL OUT[0], POSITION\n"
+  "DCL OUT[1], COLOR\n"
+  "  0: MOV OUT[1], IN[1]\n"
+  "  1: MOV OUT[0], IN[0]\n"
+  "  2: END\n";
+
+   handle = graw_parse_vertex_shader(info.ctx, text);
+   info.ctx->bind_vs_state(info.ctx, handle);
+}
+
+
+static void set_fragment_shader( void )
+{
+   void *handle;
+   const char *text =
+  "FRAG\n"
+  "DCL IN[0], COLOR, LINEAR\n"
+  "DCL OUT[0], COLOR\n"
+  "  0: MOV OUT[0], IN[0]\n"
+  "  1: END\n";
+
+   handle = graw_parse_fragment_shader(info.ctx, text);
+   info.ctx->bind_fs_state(info.ctx, handle);
+}
+
+
+static void draw( void )
+{
+   union pipe_color_union clear_color = { {1,0,1,1} };
+
+   info.ctx->clear(info.ctx, PIPE_CLEAR_COLOR, &clear_color, 0, 0);
+   util_draw_arrays(info.ctx, PIPE_PRIM_TRIANGLES, 0, 3);
+   info.ctx->flush(info.ctx, NULL, 0);
+
+   graw_save_surface_to_file(info.ctx, info.color_surf[0], NULL);
+
+   graw_util_flush_front(&info);
+}
+
+
+static void init( void )
+{
+   if (!graw_util_create_window(&info, WIDTH, HEIGHT, 1, FALSE))
+  exit(1);
+
+   graw_util_default_state(&info, FALSE);
+
+   {
+  struct pipe_rasterizer_state rasterizer;
+  void *handle;
+  memset(&rasterizer, 0, sizeof rasterizer);
+  rasterizer.cull_face = PIPE_FACE_NONE;
+  rasterizer.half_pixel_center = 1;
+  rasterizer.bottom_edge_rule = 1;
+  rasterizer.flatshade = FlatShade;
+  rasterizer.depth_clip = 1;
+  handle = info.ctx->create_rasterizer_state(info.ctx, &rasterizer);
+  info.ctx->bind_rasterizer_state(info.ctx, handle);
+   }
+
+
+   graw_util_viewport(&info, 0, 0, WIDTH, HEIGHT, 30, 1000);
+
+   set_vertices();
+   set_vertex_shader();
+   set_fragment_shader();
+}
+
+static void args(int argc, char *argv[])
+{
+   int i;
+
+   for (i = 1; i < argc; ) {
+  if (graw_parse_args(&i, argc, argv)) {
+ /* ok */
+  }
+  else if (strcmp(argv[i], "-f") == 0

Re: [Mesa-dev] [PATCH] gallivm: deduplicate some indirect register address code

2013-11-06 Thread Zack Rusin

Looks good.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> There's only one minor functional change, for immediates the pixel offsets
> are no longer added since the values are all the same for all elements in
> any case (it might be better if those weren't stored as soa vectors in the
> first place maybe).
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |  253
>  +--
>  1 file changed, 96 insertions(+), 157 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 75f6def..5f81066 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -898,6 +898,39 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
>  }
>  
>  static LLVMValueRef
> +get_soa_array_offsets(struct lp_build_context *uint_bld,
> +  LLVMValueRef indirect_index,
> +  unsigned chan_index,
> +  boolean need_perelement_offset)
> +{
> +   struct gallivm_state *gallivm = uint_bld->gallivm;
> +   LLVMValueRef chan_vec =
> +  lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
> +   LLVMValueRef length_vec =
> +  lp_build_const_int_vec(gallivm, uint_bld->type,
> uint_bld->type.length);
> +   LLVMValueRef index_vec;
> +
> +   /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
> +   index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> +   index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
> +   index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +
> +   if (need_perelement_offset) {
> +  LLVMValueRef pixel_offsets;
> +  int i;
> + /* build pixel offset vector: {0, 1, 2, 3, ...} */
> +  pixel_offsets = uint_bld->undef;
> +  for (i = 0; i < uint_bld->type.length; i++) {
> + LLVMValueRef ii = lp_build_const_int32(gallivm, i);
> + pixel_offsets = LLVMBuildInsertElement(gallivm->builder,
> pixel_offsets,
> +ii, ii, "");
> +  }
> +  index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
> +   }
> +   return index_vec;
> +}
> +
> +static LLVMValueRef
>  emit_fetch_constant(
> struct lp_build_tgsi_context * bld_base,
> const struct tgsi_full_src_register * reg,
> @@ -908,7 +941,6 @@ emit_fetch_constant(
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> struct lp_build_context *uint_bld = &bld_base->uint_bld;
> -   LLVMValueRef indirect_index = NULL;
> unsigned dimension = 0;
> LLVMValueRef dimension_index;
> LLVMValueRef consts_ptr;
> @@ -927,16 +959,15 @@ emit_fetch_constant(
> consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr,
> dimension_index);
>  
> if (reg->Register.Indirect) {
> +  LLVMValueRef indirect_index;
> +  LLVMValueRef swizzle_vec =
> + lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
> +  LLVMValueRef index_vec;  /* index into the const buffer */
> +
>indirect_index = get_indirect_index(bld,
>reg->Register.File,
>reg->Register.Index,
>®->Indirect);
> -   }
> -
> -   if (reg->Register.Indirect) {
> -  LLVMValueRef swizzle_vec =
> - lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
> swizzle);
> -  LLVMValueRef index_vec;  /* index into the const buffer */
>  
>/* index_vec = indirect_index * 4 + swizzle */
>index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> @@ -949,7 +980,7 @@ emit_fetch_constant(
>LLVMValueRef index;  /* index into the const buffer */
>LLVMValueRef scalar, scalar_ptr;
>  
> -  index = lp_build_const_int32(gallivm, reg->Register.Index*4 +
> swizzle);
> +  index = lp_build_const_int32(gallivm, reg->Register.Index * 4 +
> swizzle);
>  
>scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
>  &index, 1, "");
> @@ -974,49 +1005,32 @@ emit_fetch_immediate(
> struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
> struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> -   struct lp_build_context *uint_bld = &bld_base->uint_bld;
> -   struct lp_build_context *fl

Re: [Mesa-dev] [PATCH] gallivm: Compile flag to debug TGSI execution through printfs.

2013-11-13 Thread Zack Rusin

That's very nice Jose! Looks good to me.


- Original Message -
> From: José Fonseca 
> 
> It is similar to tgsi_exec.c's DEBUG_EXECUTION compile flag.
> 
> I had prototyped this for a while while debugging an issue, but finally
> cleaned this up and added a few more bells and whistles.
> 
> Here is a sample output.
> 
> CONST[0]:
>   X: 0.006250 0.006250 0.006250 0.006250
>   Y: -0.007143 -0.007143 -0.007143 -0.007143
>   Z: -1.00 -1.00 -1.00 -1.00
>   W: 1.00 1.00 1.00 1.00
> IN[0]:
>   X: 143.50 175.50 175.50 143.50
>   Y: 123.50 123.50 155.50 155.50
>   Z: 0.00 0.00 0.00 0.00
>   W: 1.00 1.00 1.00 1.00
> >   1: RCP TEMP[0].w, IN[0].
> TEMP[0].w =  1 1 1 1
> >   2: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw
> TEMP[0].x =  -0.103124976 0.0968750715 0.0968750715 -0.103124976
> TEMP[0].y =  0.117857158 0.117857158 -0.110714316 -0.110714316
> >   3: MUL OUT[0].xy, TEMP[0], TEMP[0].
> OUT[0].x =  -0.103124976 0.0968750715 0.0968750715 -0.103124976
> OUT[0].y =  0.117857158 0.117857158 -0.110714316 -0.110714316
> >   4: MUL OUT[0].z, IN[0]., TEMP[0].
> OUT[0].z =  0 0 0 0
> >   5: MOV OUT[0].w, TEMP[0]
> OUT[0].w =  1 1 1 1
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 158
>  +++-
>  src/gallium/auxiliary/tgsi/tgsi_dump.c  |  23 
>  src/gallium/auxiliary/tgsi/tgsi_dump.h  |   7 ++
>  3 files changed, 159 insertions(+), 29 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 5f81066..917826d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -47,6 +47,7 @@
>  #include "tgsi/tgsi_parse.h"
>  #include "tgsi/tgsi_util.h"
>  #include "tgsi/tgsi_scan.h"
> +#include "tgsi/tgsi_strings.h"
>  #include "lp_bld_tgsi_action.h"
>  #include "lp_bld_type.h"
>  #include "lp_bld_const.h"
> @@ -67,6 +68,17 @@
>  
>  #define DUMP_GS_EMITS 0
>  
> +/*
> + * If non-zero, the generated LLVM IR will print intermediate results on
> every TGSI
> + * instruction.
> + *
> + * TODO:
> + * - take execution masks in consideration
> + * - debug control-flow instructions
> + */
> +#define DEBUG_EXECUTION 0
> +
> +
>  static void lp_exec_mask_init(struct lp_exec_mask *mask, struct
>  lp_build_context *bld)
>  {
> LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
> @@ -664,6 +676,43 @@ static void lp_exec_mask_endsub(struct lp_exec_mask
> *mask, int *pc)
>  }
>  
>  
> +static LLVMValueRef
> +get_file_ptr(struct lp_build_tgsi_soa_context *bld,
> + unsigned file,
> + unsigned index,
> + unsigned chan)
> +{
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> +   LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
> +   LLVMValueRef var_of_array;
> +
> +   switch (file) {
> +   case TGSI_FILE_TEMPORARY:
> +  array_of_vars = bld->temps;
> +  var_of_array = bld->temps_array;
> +  break;
> +   case TGSI_FILE_OUTPUT:
> +  array_of_vars = bld->outputs;
> +  var_of_array = bld->outputs_array;
> +  break;
> +   default:
> +  assert(0);
> +  return NULL;
> +   }
> +
> +   assert(chan < 4);
> +
> +   if (bld->indirect_files & (1 << file)) {
> +  LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
> index * 4 + chan);
> +  return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
> +   }
> +   else {
> +  assert(index <= bld->bld_base.info->file_max[file]);
> +  return array_of_vars[index][chan];
> +   }
> +}
> +
> +
>  /**
>   * Return pointer to a temporary register channel (src or dest).
>   * Note that indirect addressing cannot be handled here.
> @@ -675,15 +724,7 @@ lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context
> *bld,
>   unsigned index,
>   unsigned chan)
>  {
> -   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> -   assert(chan < 4);
> -   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> -  LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
> index * 4 + chan);
> -  return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
> -   }
> -   else {
> -  return bld->temps[index][chan];
> -   }
> +   return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
>  }
>  
>  /**
> @@ -697,16 +738,7 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
> unsigned index,
> unsigned chan)
>  {
> -   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> -   assert(chan < 4);
> -   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> -  LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
> - index * 4 + chan);
> -  return LLVMBuildGEP(builder, bld-

[Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision

2013-11-20 Thread Zack Rusin

8 bit precision is required by d3d10 but unfortunately
requires 64 bit rasterizer. This commit implements
64 bit rasterization with full support for 8bit subpixel
precision. It's a combination of all individual commits
from the llvmpipe-rast-64 branch.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_rast.c |  11 ++
 src/gallium/drivers/llvmpipe/lp_rast.h |  47 +--
 src/gallium/drivers/llvmpipe/lp_rast_debug.c   |   6 +-
 src/gallium/drivers/llvmpipe/lp_rast_priv.h|  27 
 src/gallium/drivers/llvmpipe/lp_rast_tri.c | 173 +
 src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h |  56 
 src/gallium/drivers/llvmpipe/lp_setup_line.c   |   2 +-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c| 155 ++
 src/gallium/tests/graw/SConscript  |   1 +
 src/gallium/tests/graw/tri-large.c | 173 +
 10 files changed, 500 insertions(+), 151 deletions(-)
 create mode 100644 src/gallium/tests/graw/tri-large.c

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index af661e9..0cd62c2 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -589,6 +589,17 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
lp_rast_begin_query,
lp_rast_end_query,
lp_rast_set_state,
+   lp_rast_triangle_32_1,
+   lp_rast_triangle_32_2,
+   lp_rast_triangle_32_3,
+   lp_rast_triangle_32_4,
+   lp_rast_triangle_32_5,
+   lp_rast_triangle_32_6,
+   lp_rast_triangle_32_7,
+   lp_rast_triangle_32_8,
+   lp_rast_triangle_32_3_4,
+   lp_rast_triangle_32_3_16,
+   lp_rast_triangle_32_4_16
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h 
b/src/gallium/drivers/llvmpipe/lp_rast.h
index 43c598d..b81d94f 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -46,10 +46,11 @@ struct lp_scene;
 struct lp_fence;
 struct cmd_bin;
 
-#define FIXED_TYPE_WIDTH 32
+#define FIXED_TYPE_WIDTH 64
 /** For sub-pixel positioning */
-#define FIXED_ORDER 4
+#define FIXED_ORDER 8
 #define FIXED_ONE (1<
+#include "util/u_sse.h"
+
+static INLINE __m128i
+lp_plane_to_m128i(const struct lp_rast_plane *plane)
+{
+   return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
+ (int32_t)plane->dcdy, (int32_t)plane->eo);
+}
+
+#endif
+
 #endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c 
b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
index 3bc75aa..587c793 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -195,8 +195,8 @@ debug_triangle(int tilex, int tiley,
while (plane_mask) {
   plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
   plane[nr_planes].c = (plane[nr_planes].c +
-plane[nr_planes].dcdy * tiley -
-plane[nr_planes].dcdx * tilex);
+IMUL64(plane[nr_planes].dcdy, tiley) -
+IMUL64(plane[nr_planes].dcdx, tilex));
   nr_planes++;
}
 
@@ -217,7 +217,7 @@ debug_triangle(int tilex, int tiley,
   }
 
   for (i = 0; i < nr_planes; i++) {
- plane[i].c += plane[i].dcdx * TILE_SIZE;
+ plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE);
  plane[i].c += plane[i].dcdy;
   }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h 
b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 41fe097..77ec329 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -355,6 +355,33 @@ void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
 void lp_rast_triangle_4_16( struct lp_rasterizer_task *, 
 const union lp_rast_cmd_arg );
 
+
+void lp_rast_triangle_32_1( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_2( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_3( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_4( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_5( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_6( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_7( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_8( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_32_3_4(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *

Re: [Mesa-dev] [PATCH] llvmpipe: support 8bit subpixel precision

2013-11-21 Thread Zack Rusin

> For me too, other than the fixed_position members, looks good.  Thanks for
> your perseverance on this Zack!

Thanks! ok, attached is a version that makes position and dx/dy 32bit again, it 
seems to work great. I have a question for you guys if you run the piglits:
./bin/triangle-rasterization-overdraw -max_size -seed 0xA8402F24 -count 1 -auto
on master does it fail for you? It fails for me on master, with and without the 
patch. I'm not sure what to make of it, I might have been looking at 
rasterization for too long. Looking at the rendering it looks correct.

zFrom 55c9a288c7ebc37b32bc75526e6de71a838ccaef Mon Sep 17 00:00:00 2001
From: Zack Rusin 
Date: Thu, 24 Oct 2013 22:05:22 -0400
Subject: [PATCH] llvmpipe: support 8bit subpixel precision

8 bit precision is required by d3d10 but unfortunately
requires 64 bit rasterizer. This commit implements
64 bit rasterization with full support for 8bit subpixel
precision. It's a combination of all individual commits
from the llvmpipe-rast-64 branch.
---
 src/gallium/drivers/llvmpipe/lp_rast.c |  11 ++
 src/gallium/drivers/llvmpipe/lp_rast.h |  47 +--
 src/gallium/drivers/llvmpipe/lp_rast_debug.c   |   6 +-
 src/gallium/drivers/llvmpipe/lp_rast_priv.h|  27 
 src/gallium/drivers/llvmpipe/lp_rast_tri.c | 173 
 src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h |  56 
 src/gallium/drivers/llvmpipe/lp_setup_line.c   |   2 +-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c| 147 +
 src/gallium/tests/graw/SConscript  |   1 +
 src/gallium/tests/graw/tri-large.c | 174 +
 10 files changed, 496 insertions(+), 148 deletions(-)
 create mode 100644 src/gallium/tests/graw/tri-large.c

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index af661e9..0cd62c2 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -589,6 +589,17 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
lp_rast_begin_query,
lp_rast_end_query,
lp_rast_set_state,
+   lp_rast_triangle_32_1,
+   lp_rast_triangle_32_2,
+   lp_rast_triangle_32_3,
+   lp_rast_triangle_32_4,
+   lp_rast_triangle_32_5,
+   lp_rast_triangle_32_6,
+   lp_rast_triangle_32_7,
+   lp_rast_triangle_32_8,
+   lp_rast_triangle_32_3_4,
+   lp_rast_triangle_32_3_16,
+   lp_rast_triangle_32_4_16
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 43c598d..b81d94f 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -46,10 +46,11 @@ struct lp_scene;
 struct lp_fence;
 struct cmd_bin;
 
-#define FIXED_TYPE_WIDTH 32
+#define FIXED_TYPE_WIDTH 64
 /** For sub-pixel positioning */
-#define FIXED_ORDER 4
+#define FIXED_ORDER 8
 #define FIXED_ONE (1<
+#include "util/u_sse.h"
+
+static INLINE __m128i
+lp_plane_to_m128i(const struct lp_rast_plane *plane)
+{
+   return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
+ (int32_t)plane->dcdy, (int32_t)plane->eo);
+}
+
+#endif
+
 #endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
index 3bc75aa..587c793 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -195,8 +195,8 @@ debug_triangle(int tilex, int tiley,
while (plane_mask) {
   plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
   plane[nr_planes].c = (plane[nr_planes].c +
-plane[nr_planes].dcdy * tiley -
-plane[nr_planes].dcdx * tilex);
+IMUL64(plane[nr_planes].dcdy, tiley) -
+IMUL64(plane[nr_planes].dcdx, tilex));
   nr_planes++;
}
 
@@ -217,7 +217,7 @@ debug_triangle(int tilex, int tiley,
   }
 
   for (i = 0; i < nr_planes; i++) {
- plane[i].c += plane[i].dcdx * TILE_SIZE;
+ plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE);
  plane[i].c += plane[i].dcdy;
   }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 41fe097..77ec329 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -355,6 +355,33 @@ void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
 void lp_rast_triangle_4_16( struct lp_rasterizer_task *, 
 const union lp_rast_cmd_arg );
 
+
+void lp_rast_triangle_32_1( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_2( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_3( struct lp_rasterizer_task *, 
+ const union lp_rast_cmd_arg );
+void

Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency

2014-06-10 Thread Zack Rusin

That looks wrong.  The total number of verts per buffer is the maximum number 
of verts that can be output per invocation (primitive_boundary) times number of 
invocations of geometry shader (num_in_primitives).

It's not maximum number of verts that can be output per invocation 
(primitive_boundary) times maximum number of primitives output by geometry 
shader (max_out_prims).

z

- Original Message -
> From: Dave Airlie 
> 
> This crashes on softpipe due to a lack of output memory allocated,
> 
> it appears we allocate memory for enough primtives, but not vertices
> so convert to number of vertices.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/draw/draw_gs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c
> b/src/gallium/auxiliary/draw/draw_gs.c
> index fc4f697..0a9bf81 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -555,7 +555,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader
> *shader,
> /* we allocate exactly one extra vertex per primitive to allow the GS to
> emit
>  * overflown vertices into some area where they won't harm anyone */
> unsigned total_verts_per_buffer = shader->primitive_boundary *
> -  num_in_primitives;
> +  max_out_prims * u_vertices_per_prim(shader->output_primitive);
>  
> //Assume at least one primitive
> max_out_prims = MAX2(max_out_prims, 1);
> --
> 1.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency

2014-06-10 Thread Zack Rusin

I think the code is already correct and something else goes wrong. The tgsi 
geometry shader code was never done properly so it's more than likely that 
tgsi_exec is doing something wonky.

Geometry shaders specify the maximum number of vertices that they can emit. 
That's what draw_geometry_shader::max_output_vertices is. If a geometry shader 
emits more than that, the verts will be ignored. So our primitive_boundary is 
max_output_vertices + 1  because we want to make sure that in SoA we have a 
scratch space where we can keep writing the overflowed vertices. 

So the worst case scenario for our output buffer is: (max_output_vertices + 1) 
* geometry shader invocations. That's what we have there now and that's 
correct. I don't remember what tgsi_exec does, I think I never even implemented 
proper SoA for gs in tgsi_exec, so if there's anything wrong I'd look for the 
bug there.

z

- Original Message -
> On 11 June 2014 00:02, Zack Rusin  wrote:
> > That looks wrong.  The total number of verts per buffer is the maximum
> > number of verts that can be output per invocation (primitive_boundary)
> > times number of invocations of geometry shader (num_in_primitives).
> >
> > It's not maximum number of verts that can be output per invocation
> > (primitive_boundary) times maximum number of primitives output by geometry
> > shader (max_out_prims).
> >
> 
> Okay so just adding * u_vertices_per_prim(shader->output_primitive);
> would suffice?
> 
> Dave
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] draw/gs: fix segfault in glsl-1.50-gs-mismatch-prim-type triangles_adjacency

2014-06-10 Thread Zack Rusin

> I'll revisit it today and see if I can spot something else wrong, it
> fails for triangle adj because there are 6 vertices per primitive and
> we have only malloced space for 4.

It has to be something else because that's impossible, in fact it's 2x 
impossible ;)

1) It's illegal and impossible for geometry shader to emit adjacency 
primitives. Only points, lines and triangles can be emitted from gs.

2) The output primitive is irrelevant for the size of the buffer. If a geometry 
shader claims that the max output vertices is four, then it can, at most, emit 
4 points, 2 lines or 1 triangle (incomplete primitives are discarded from 
geometry shader so the extra 4th vertex will be discarded). If a geometry 
shader claims to max emit 4 vertices and you try to emit 100 points, you will 
still get only 4 points (96 will be counted as overflowed but they won't be 
emitted).

My advice would be to check what's in the output buffer with llvmpipe. If 
tgsi_exec doesn't match llvmpipe then there's a bug in tgsi_exec.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] draw: avoid buffer overflows with bad geometry programs.

2014-06-10 Thread Zack Rusin

To be honest I still don't like it. While the tgsi_exec specific paths in 
draw_gs don't matter to me and can be as ugly as they need to be, they can't be 
polluting the draw_pt_emit code, in other words the primitive_lengths can't be 
bogus at that point - prim_info can't lie about the amount of data that it's 
holding.

z

- Original Message -
> From: Dave Airlie 
> 
> One of the mismatched tests have a max output vertices of 3,
> but emits 6 vertices, this means the output buffer is undersized
> and causes problems down the line, so limit things later if we
> have a number of vertices lower than the number required to execute
> a primitive.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/draw/draw_gs.c  | 4 ++--
>  src/gallium/auxiliary/draw/draw_pt_emit.c | 8 +++-
>  2 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c
> b/src/gallium/auxiliary/draw/draw_gs.c
> index fc4f697..d07e88f 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -92,8 +92,8 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
>unsigned num_verts_per_prim = machine->Primitives[prim_idx];
>shader->primitive_lengths[prim_idx +   shader->emitted_primitives] =
>   machine->Primitives[prim_idx];
> -  shader->emitted_vertices += num_verts_per_prim;
> -  for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
> +  shader->emitted_vertices += MIN2(num_verts_per_prim,
> shader->max_output_vertices);
> +  for (j = 0; j < MIN2(num_verts_per_prim, shader->max_output_vertices);
> j++, current_idx++) {
>   int idx = current_idx * shader->info.num_outputs;
>  #ifdef DEBUG_OUTPUTS
>   debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
> diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c
> b/src/gallium/auxiliary/draw/draw_pt_emit.c
> index 011efe7..d8e2809 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_emit.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
> @@ -26,6 +26,7 @@
>   **/
>  
>  #include "util/u_memory.h"
> +#include "util/u_math.h"
>  #include "draw/draw_context.h"
>  #include "draw/draw_private.h"
>  #include "draw/draw_vbuf.h"
> @@ -255,9 +256,14 @@ draw_pt_emit_linear(struct pt_emit *emit,
>  i < prim_info->primitive_count;
>  start += prim_info->primitive_lengths[i], i++)
> {
> +  int len;
> +  if (start > count)
> + continue;
> +  len = MIN2(prim_info->primitive_lengths[i], count);
>render->draw_arrays(render,
>start,
> -  prim_info->primitive_lengths[i]);
> +  len);
> +
> }
> 
> render->release_vertices(render);
> --
> 1.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] tgsi/gs: bound max output vertices in shader

2014-06-10 Thread Zack Rusin

Looks great. If I was into diffs I'd make sweet and passionate love to this one.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Dave Airlie 
> 
> This limits the number of emitted vertices to the shaders max output
> vertices, and avoids us writing things into memory that isn't big
> enough for it.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 
>  src/gallium/auxiliary/tgsi/tgsi_exec.h | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 69d98fd..d848348 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -789,6 +789,11 @@ tgsi_exec_machine_bind_shader(
>   break;
>  
>case TGSI_TOKEN_TYPE_PROPERTY:
> + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) {
> +if (parse.FullToken.FullProperty.Property.PropertyName ==
> TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
> +   mach->MaxOutputVertices =
> parse.FullToken.FullProperty.u[0].Data;
> +}
> + }
>   break;
>  
>default:
> @@ -1621,6 +1626,9 @@ emit_vertex(struct tgsi_exec_machine *mach)
>   if ((mach->ExecMask & (1 << i)))
> */
> if (mach->ExecMask) {
> +  if
> (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]
> >= mach->MaxOutputVertices)
> + return;
> +
>mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] +=
>mach->NumOutputs;
>
> mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
> }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index 7a82f69..d53c4ba 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -297,6 +297,7 @@ struct tgsi_exec_machine
> unsigned  *Primitives;
> unsigned   NumOutputs;
> unsigned   MaxGeometryShaderOutputs;
> +   unsigned   MaxOutputVertices;
>  
> /* FRAGMENT processor only. */
> const struct tgsi_interp_coef *InterpCoefs;
> --
> 1.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] gallivm: handle nan's in min/max

2013-07-16 Thread Zack Rusin

Both D3D10 and OpenCL say that if one the inputs is nan then
the other should be returned. To preserve that behavior
the patche fixes both the sse and the non-sse paths in both
functions.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c |   60 ---
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index e7955aa..7beb117 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -176,12 +176,36 @@ lp_build_min_simple(struct lp_build_context *bld,
}
 
if(intrinsic) {
-  return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+  LLVMValueRef bmask, max;
+  /* We need to handle nan's for floating point numbers. If one of the
+   * inputs is nan the other should be returned (required by both D3D10+
+   * and OpenCL)
+   */
+  if (type.floating) {
+ bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, "");
+ max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+   type,
+   intr_size, a, b);
+ return LLVMBuildSelect(bld->gallivm->builder, bmask, max, a, "");
+  } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+type,
+intr_size, a, b);
+  }
}
 
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+   /* We need to handle nan's for floating point numbers. If one of the
+* inputs is nan the other should be returned (required by both D3D10+
+* and OpenCL)
+*/
+   if (type.floating) {
+  LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
+  LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder,
+   LLVMRealOEQ, a, a, "");
+  nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, 
"");
+  cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, "");
+   }
return lp_build_select(bld, cond, a, b);
 }
 
@@ -293,12 +317,36 @@ lp_build_max_simple(struct lp_build_context *bld,
}
 
if(intrinsic) {
-  return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+  LLVMValueRef bmask, min;
+  /* We need to handle nan's for floating point numbers. If one of the
+   * inputs is nan the other should be returned (required by both D3D10+
+   * and OpenCL)
+   */
+  if (type.floating) {
+ bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, "");
+ min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+   type,
+   intr_size, a, b);
+ return LLVMBuildSelect(bld->gallivm->builder, bmask, min, a, "");
+  } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+type,
+intr_size, a, b);
+  }
}
 
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+   /* We need to handle nan's for floating point numbers. If one of the
+* inputs is nan the other should be returned (required by both D3D10+
+* and OpenCL)
+*/
+   if (type.floating) {
+  LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
+  LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder,
+   LLVMRealOEQ, a, a, "");
+  nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, 
"");
+  cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, "");
+   }
return lp_build_select(bld, cond, a, b);
 }
 
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] gallivm: fix edge cases in exp2

2013-07-16 Thread Zack Rusin

exp2(0) needs to be exactly 1, if exp2(src) overflows then it has
to be equal to infinity and exp2(nan) has to be equal to a nan.
The patches fixes all three cases.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c |   16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7beb117..467cbc6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -3004,7 +3004,7 @@ lp_build_polynomial(struct lp_build_context *bld,
  */
 const double lp_build_exp2_polynomial[] = {
 #if EXP_POLY_DEGREE == 5
-   0.99925063526176901,
+   1.0,
0.693153073200168932794,
0.240153617044375388211,
0.0558263180532956664775,
@@ -3046,6 +3046,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef expipart = NULL;
LLVMValueRef expfpart = NULL;
LLVMValueRef res = NULL;
+   LLVMValueRef infmask = bld->zero;
+   LLVMValueRef nanmask = bld->zero;
 
assert(lp_check_value(bld->type, x));
 
@@ -3059,6 +3061,10 @@ lp_build_exp2_approx(struct lp_build_context *bld,
 
   assert(type.floating && type.width == 32);
 
+  /* We need ot handle both inf and nan inputs */
+  infmask = lp_build_cmp(bld, PIPE_FUNC_GREATER, x,
+ lp_build_const_vec(bld->gallivm, type,  129.0));
+  nanmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x, "");
   x = lp_build_min(bld, x, lp_build_const_vec(bld->gallivm, type,  129.0));
   x = lp_build_max(bld, x, lp_build_const_vec(bld->gallivm, type, 
-126.9));
 
@@ -3081,6 +3087,14 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  Elements(lp_build_exp2_polynomial));
 
   res = LLVMBuildFMul(builder, expipart, expfpart, "");
+  /* If the input would overflow make sure return is correctly an inf */
+  res = lp_build_select(bld, infmask,
+lp_build_const_vec(bld->gallivm, type,  INFINITY),
+res);
+  /* If the input was a nan make sure the return is also a nan */
+  res = LLVMBuildSelect(bld->gallivm->builder, nanmask, res,
+lp_build_const_vec(bld->gallivm, type,  NAN),
+"");
}
 
if(p_exp2_int_part)
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] llvmpipe: fix blending with SRC_ALPHA_SATURATE with some formats without alpha

2013-07-17 Thread Zack Rusin

Looks good to me.

- Original Message -
> From: Roland Scheidegger 
> 
> We were fixing up the blend factor to ZERO, however this only works correctly
> with fixed point render buffers where the input values are clamped to 0/1
> (because src_alpha_saturate is min(As, 1-Ad) so can be negative with
> unclamped
> inputs). Haven't seen any failure anywhere due to that with fixed point SNORM
> buffers (which clamp inputs to -1/1) but it should apply there as well (snorm
> blending is rare, even opengl 4.3 doesn't require snorm rendertargets at all,
> d3d10 requires them but they are not blendable).
> Doesn't look like piglit hits this though (some internal testing hits the
> float case at least). (With legacy OpenGL we could theoretically still use
> the
> fixup to zero if the fragment color clamp is enabled, but we can't detect
> that
> easily since we don't support native clamping hence it gets baked into the
> shader.)
> ---
>  src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |   18 ++
>  src/gallium/drivers/llvmpipe/lp_state_fs.c  |   16 
>  2 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
> b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
> index c4d04a2..377eaa5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
> +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
> @@ -114,10 +114,20 @@ lp_build_blend_factor_unswizzled(struct
> lp_build_blend_aos_context *bld,
>if(alpha)
>   return bld->base.one;
>else {
> - if(!bld->inv_dst)
> -bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
> - if(!bld->saturate)
> -bld->saturate = lp_build_min(&bld->base, src_alpha,
> bld->inv_dst);
> + /*
> +  * if there's separate src_alpha there's no dst alpha hence the
> complement
> +  * is zero but for unclamped float inputs min can be non-zero
> (negative).
> +  */
> + if (bld->src_alpha) {
> +if (!bld->saturate)
> +   bld->saturate = lp_build_min(&bld->base, src_alpha,
> bld->base.zero);
> + }
> + else {
> +if(!bld->inv_dst)
> +   bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
> +if(!bld->saturate)
> +   bld->saturate = lp_build_min(&bld->base, src_alpha,
> bld->inv_dst);
> + }
>   return bld->saturate;
>}
> case PIPE_BLENDFACTOR_CONST_COLOR:
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index afd01e3..a305109 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -2607,7 +2607,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
>   * Return the blend factor equivalent to a destination alpha of one.
>   */
>  static INLINE unsigned
> -force_dst_alpha_one(unsigned factor)
> +force_dst_alpha_one(unsigned factor, boolean clamped_zero)
>  {
> switch(factor) {
> case PIPE_BLENDFACTOR_DST_ALPHA:
> @@ -2615,7 +2615,10 @@ force_dst_alpha_one(unsigned factor)
> case PIPE_BLENDFACTOR_INV_DST_ALPHA:
>return PIPE_BLENDFACTOR_ZERO;
> case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
> -  return PIPE_BLENDFACTOR_ZERO;
> +  if (clamped_zero)
> + return PIPE_BLENDFACTOR_ZERO;
> +  else
> + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
> }
>  
> return factor;
> @@ -2735,8 +2738,13 @@ make_variant_key(struct llvmpipe_context *lp,
> */
>if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
>format_desc->swizzle[3] == format_desc->swizzle[0]) {
> - blend_rt->rgb_src_factor   =
> force_dst_alpha_one(blend_rt->rgb_src_factor);
> - blend_rt->rgb_dst_factor   =
> force_dst_alpha_one(blend_rt->rgb_dst_factor);
> + /* Doesn't cover mixed snorm/unorm but can't render to them anyway
> */
> + boolean clamped_zero = !util_format_is_float(format) &&
> +!util_format_is_snorm(format);
> + blend_rt->rgb_src_factor   =
> force_dst_alpha_one(blend_rt->rgb_src_factor,
> +  clamped_zero);
> + blend_rt->rgb_dst_factor   =
> force_dst_alpha_one(blend_rt->rgb_dst_factor,
> +  clamped_zero);
>   blend_rt->alpha_func   = blend_rt->rgb_func;
>   blend_rt->alpha_src_factor = blend_rt->rgb_src_factor;
>   blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor;
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: fix rgtc snorm decoding

2013-07-23 Thread Zack Rusin

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> The codeword must be unsigned (otherwise will shift in 1's from above when
> merging low/high parts so some texels decode wrong).
> This also affects gallium's util/u_format_rgtc.
> ---
>  src/mesa/main/texcompress_rgtc_tmp.h |6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/main/texcompress_rgtc_tmp.h
> b/src/mesa/main/texcompress_rgtc_tmp.h
> index 277d69b..5fa9de6 100644
> --- a/src/mesa/main/texcompress_rgtc_tmp.h
> +++ b/src/mesa/main/texcompress_rgtc_tmp.h
> @@ -37,9 +37,9 @@ static void TAG(fetch_texel_rgtc)(unsigned srcRowStride,
> const TYPE *pixdata,
> const TYPE alpha0 = blksrc[0];
> const TYPE alpha1 = blksrc[1];
> const char bit_pos = ((j&3) * 4 + (i&3)) * 3;
> -   const TYPE acodelow = blksrc[2 + bit_pos / 8];
> -   const TYPE acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 + bit_pos / 8] :
> 0;
> -   const TYPE code = (acodelow >> (bit_pos & 0x7) |
> +   const unsigned char acodelow = blksrc[2 + bit_pos / 8];
> +   const unsigned char acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 +
> bit_pos / 8] : 0;
> +   const unsigned char code = (acodelow >> (bit_pos & 0x7) |
>(acodehigh  << (8 - (bit_pos & 0x7 & 0x7;
>  
> if (code == 0)
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] draw: cleanup and fix instance id computation

2013-07-23 Thread Zack Rusin

The instance id system value always starts at 0, even if the
specified start instance is larger than 0. Instead of implicitly
setting instance id to instance id plus start instance and then
having to subtract instance id when computing the buffer offsets
lets just set instance id to the proper instance id. This fixes
instance id computation and cleansup buffer offset computation.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c  |7 +++
 src/gallium/auxiliary/draw/draw_pt.c|7 ---
 src/gallium/auxiliary/translate/translate_generic.c |3 +--
 src/gallium/auxiliary/translate/translate_sse.c |4 
 4 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 79e7a9b..a3174b4 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -707,15 +707,14 @@ generate_fetch(struct gallivm_state *gallivm,
if (velem->instance_divisor) {
   /* Index is equal to the start instance plus the number of current 
* instance divided by the divisor. In this case we compute it as:
-   * index = start_instance + ((instance_id - start_instance) / divisor)
+   * index = start_instance + (instance_id  / divisor)
*/
   LLVMValueRef current_instance;
   index = lp_build_const_int32(gallivm, draw->start_instance);
-  current_instance = LLVMBuildSub(builder, instance_id, index, "");
-  current_instance = LLVMBuildUDiv(builder, current_instance,
+  current_instance = LLVMBuildUDiv(builder, instance_id,
lp_build_const_int32(gallivm, 
velem->instance_divisor),
"instance_divisor");
-  index = LLVMBuildAdd(builder, index, current_instance, "instance");
+  index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
}
 
stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
diff --git a/src/gallium/auxiliary/draw/draw_pt.c 
b/src/gallium/auxiliary/draw/draw_pt.c
index ccde371..fcc2405 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -542,11 +542,12 @@ draw_vbo(struct draw_context *draw,
 */
 
for (instance = 0; instance < info->instance_count; instance++) {
-  draw->instance_id = instance + info->start_instance;
+  unsigned instance_idx = instance + info->start_instance;
   draw->start_instance = info->start_instance;
+  draw->instance_id = instance;
   /* check for overflow */
-  if (draw->instance_id < instance ||
-  draw->instance_id < info->start_instance) {
+  if (instance_idx < instance ||
+  instance_idx < draw->start_instance) {
  /* if we overflown just set the instance id to the max */
  draw->instance_id = 0x;
   }
diff --git a/src/gallium/auxiliary/translate/translate_generic.c 
b/src/gallium/auxiliary/translate/translate_generic.c
index 96e35b0..fdab0f3 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -625,8 +625,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( 
struct translate_generic *
 
  if (tg->attrib[attr].instance_divisor) {
 index = start_instance;
-index += (instance_id - start_instance) /
-   tg->attrib[attr].instance_divisor;
+index += (instance_id  / tg->attrib[attr].instance_divisor);
 /* XXX we need to clamp the index here too, but to a
  * per-array max value, not the draw->pt.max_index value
  * that's being given to us via translate->set_buffer().
diff --git a/src/gallium/auxiliary/translate/translate_sse.c 
b/src/gallium/auxiliary/translate/translate_sse.c
index a4f7b24..726a9b1 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1094,10 +1094,6 @@ static boolean init_inputs( struct translate_sse *p,
struct x86_reg tmp_EDX = p->tmp2_EDX;
struct x86_reg tmp_ECX = p->src_ECX;
 
-   /* instance_num = instance_id - start_instance */
-   x86_mov(p->func, tmp_EDX, start_instance);
-   x86_sub(p->func, tmp_EAX, tmp_EDX);
-
/* TODO: Add x86_shr() to rtasm and use it whenever
 *   instance divisor is power of two.
 */
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] draw: fix vertex id computation

2013-07-23 Thread Zack Rusin

vertex id has to be unaffected by the start index (i.e. when calling
draw arrays with start_index = 5, the first vertex_id has to still
be 0, not 5) and it has to be equal to the index when performing
indexed rendering (in which case it has to be unaffected by the
index bias). This fixes our behavior.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c|   37 ++---
 src/gallium/auxiliary/draw/draw_private.h |1 +
 src/gallium/auxiliary/draw/draw_pt.c  |1 +
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index a3174b4..adf3941 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1646,22 +1646,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
 #endif
   system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 
32*vector_length));
   for (i = 0; i < vector_length; ++i) {
- LLVMValueRef true_index =
+ LLVMValueRef vert_index =
 LLVMBuildAdd(builder,
  lp_loop.counter,
  lp_build_const_int32(gallivm, i), "");
- true_index = LLVMBuildAdd(builder, start, true_index, "");
+ LLVMValueRef true_index =
+LLVMBuildAdd(builder, start, vert_index, "");
+ LLVMValueRef vertex_id;
 
  /* make sure we're not out of bounds which can happen
   * if fetch_count % 4 != 0, because on the last iteration
   * a few of the 4 vertex fetches will be out of bounds */
  true_index = lp_build_min(&bld, true_index, fetch_max);
 
- system_values.vertex_id = LLVMBuildInsertElement(
-gallivm->builder,
-system_values.vertex_id, true_index,
-lp_build_const_int32(gallivm, i), "");
-
  if (elts) {
 LLVMValueRef fetch_ptr;
 LLVMValueRef index_overflowed;
@@ -1673,7 +1670,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
 index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  true_index, fetch_elt_max,
  "index_overflowed");
-
+
 lp_build_if(&if_ctx, gallivm, index_overflowed);
 {
/* Generate maximum possible index so that
@@ -1697,8 +1694,32 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
 }
 lp_build_endif(&if_ctx);
 true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
+/* vertex id has to be unaffected by the index bias and because
+ * indices inside our elements array have already had index
+ * bias applied we need to subtract it here to get back to the
+ * original index.
+ */
+vertex_id = LLVMBuildSub(
+   builder, true_index,
+   lp_build_const_int32(gallivm, draw->pt.user.eltBias), "");
+ } else {
+/* vertex id has to be unaffected by the original start index
+ * and because we abuse the 'start' variable to either represent
+ * the actual start index or the index at which the primitive
+ * was split (we split rendering into chunks of at most
+ * 4095-vertices) we need to back out the original start
+ * index out of our vertex id here.
+ */
+vertex_id = LLVMBuildSub(
+   builder, true_index,
+   lp_build_const_int32(gallivm, draw->start_index), "");
  }
 
+ system_values.vertex_id = LLVMBuildInsertElement(
+gallivm->builder,
+system_values.vertex_id, vertex_id,
+lp_build_const_int32(gallivm, i), "");
+
  for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
 LLVMValueRef vb_index =
diff --git a/src/gallium/auxiliary/draw/draw_private.h 
b/src/gallium/auxiliary/draw/draw_private.h
index d8cd8eb..868b6c7 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -311,6 +311,7 @@ struct draw_context
 
unsigned instance_id;
unsigned start_instance;
+   unsigned start_index;
 
 #ifdef HAVE_LLVM
struct draw_llvm *llvm;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c 
b/src/gallium/auxiliary/draw/draw_pt.c
index fcc2405..5b16bc7 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -535,6 +535,7 @@ draw_vbo(struct draw_context *draw,
}
 
draw->pt.ma

[Mesa-dev] [PATCH 3/3] draw/llvmpipe: allow indexed rendering without index buffer bound

2013-07-23 Thread Zack Rusin

this is a wonky requirement of d3d10, which expects that if
indexed rendering call is issued without an indexed buffer
bound, the rendering should still happen but with all indices
set to 0.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_private.h   |5 +++--
 src/gallium/auxiliary/draw/draw_pt.c|4 +---
 src/gallium/auxiliary/draw/draw_pt_vsplit.c |3 ++-
 src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h |6 +++---
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c   |2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_private.h 
b/src/gallium/auxiliary/draw/draw_private.h
index 868b6c7..dfb71c9 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -478,8 +478,9 @@ draw_stats_clipper_primitives(struct draw_context *draw,
  * If the index buffer would overflow we return the
  * maximum possible index.
  */
-#define DRAW_GET_IDX(_elts, _i)   \
-   (((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i])
+#define DRAW_GET_IDX(_elts, _i) \
+   (!_elts ? (0) : \
+(((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i]))
 
 /**
  * Return index of the given viewport clamping it
diff --git a/src/gallium/auxiliary/draw/draw_pt.c 
b/src/gallium/auxiliary/draw/draw_pt.c
index 5b16bc7..bf54f85 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -347,7 +347,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int 
start, uint count)
do { \
   for (j = 0; j < count; j++) {   \
  i = draw_overflow_uadd(start, j, MAX_LOOP_IDX);  \
- if (i < elt_max && elements[i] == info->restart_index) { \
+ if (i < elt_max && (elements && elements[i] == info->restart_index)) 
{ \
 if (cur_count > 0) { \
/* draw elts up to prev pos */ \
draw_pt_arrays(draw, prim, cur_start, cur_count); \
@@ -471,8 +471,6 @@ draw_vbo(struct draw_context *draw,
info = &resolved_info;
 
assert(info->instance_count > 0);
-   if (info->indexed)
-  assert(draw->pt.user.elts);
 
count = info->count;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c 
b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index 625505d..395a38c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -116,7 +116,8 @@ vsplit_get_base_idx(struct vsplit_frontend *vsplit,
 unsigned start, unsigned fetch, unsigned *ofbit)
 {
struct draw_context *draw = vsplit->draw;
-   unsigned elt_idx = draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
+   unsigned elt_idx = draw->pt.user.elts ? 
+  draw_overflow_uadd(start, fetch, MAX_ELT_IDX) : 0;
if (ofbit)
   *ofbit = 0;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h 
b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 5d72ac6..4f462c0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -44,8 +44,8 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend 
*vsplit,
unsigned fetch_start, fetch_count;
const ushort *draw_elts = NULL;
unsigned i;
-   const unsigned start = istart;
-   const unsigned end = istart + icount;
+   const unsigned start = ib ? istart : 0;
+   const unsigned end = ib ? istart + icount : icount;
 
/* If the index buffer overflows we'll need to run
 * through the normal paths */
@@ -55,7 +55,7 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend 
*vsplit,
   return FALSE;
 
/* use the ib directly */
-   if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+   if (ib && min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
   if (icount > vsplit->max_vertices)
  return FALSE;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c 
b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 4e23904..32d8f60 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -83,7 +83,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
if (info->indexed) {
   unsigned available_space = ~0;
   mapped_indices = lp->index_buffer.user_buffer;
-  if (!mapped_indices) {
+  if (!mapped_indices && lp->index_buffer.buffer) {
  mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer);
  if (lp->index_buffer.buffer->width0 > lp->index_buffer.offset)
 available_space =
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] draw: always call and move util_cpu_detect() to draw context creation.

2013-07-23 Thread Zack Rusin

Nice catch! Thanks!

- Original Message -
> From: Roland Scheidegger 
> 
> CPU detection is not really x86 specific, the ifdef in particular didn't
> even catch x86_64.
> Also move to draw context creation which seems a lot cleaner, and just
> call it always (which seems like a better idea than rely on drivers doing
> this
> especially if drivers otherwise don't need it).
> This fixes https://bugs.freedesktop.org/show_bug.cgi?id=66806.
> (Because util_cpu_caps wasn't initialized when first calling
> util_fpstate_get()
> hence it returning zero, but it would later get initialized by rtasm
> translate
> code hence when draw call returned it unmasked all exceptions by calling
> util_fpstate_set(). This was happening only with DRAW_USE_LLVM=0 or not
> compiling with llvm, otherwise the llvm init code was calling it on time
> too.)
> ---
>  src/gallium/auxiliary/draw/draw_context.c |5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_context.c
> b/src/gallium/auxiliary/draw/draw_context.c
> index 4a08765..26af984 100644
> --- a/src/gallium/auxiliary/draw/draw_context.c
> +++ b/src/gallium/auxiliary/draw/draw_context.c
> @@ -57,8 +57,7 @@ draw_get_option_use_llvm(void)
>value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
>  
>  #ifdef PIPE_ARCH_X86
> -  util_cpu_detect();
> -  /* require SSE2 due to LLVM PR6960. */
> +  /* require SSE2 due to LLVM PR6960. XXX Might be fixed by now? */
>if (!util_cpu_caps.has_sse2)
>   value = FALSE;
>  #endif
> @@ -78,6 +77,8 @@ draw_create_context(struct pipe_context *pipe, boolean
> try_llvm)
> if (draw == NULL)
>goto err_out;
>  
> +   util_cpu_detect();
> +
>  #if HAVE_LLVM
> if (try_llvm && draw_get_option_use_llvm()) {
>draw->llvm = draw_llvm_create(draw);
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] draw: fix vertex id computation

2013-07-24 Thread Zack Rusin

vertex id has to be unaffected by the start index (i.e. when calling
draw arrays with start_index = 5, the first vertex_id has to still
be 0, not 5) and it has to be equal to the index when performing
indexed rendering (in which case it has to be unaffected by the
index bias). This fixes our behavior.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c |   36 +++-
 src/gallium/auxiliary/draw/draw_llvm.h |6 ++--
 src/gallium/auxiliary/draw/draw_private.h  |1 +
 src/gallium/auxiliary/draw/draw_pt.c   |1 +
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |6 ++--
 5 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index a3174b4..c195a2b 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1486,7 +1486,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
struct gallivm_state *gallivm = variant->gallivm;
LLVMContextRef context = gallivm->context;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[9];
+   LLVMTypeRef arg_types[10];
unsigned num_arg_types =
   elts ? Elements(arg_types) : Elements(arg_types) - 1;
LLVMTypeRef func_type;
@@ -1496,6 +1496,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
struct lp_type vs_type;
LLVMValueRef end, start;
LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
+   LLVMValueRef vertex_id_offset;
LLVMValueRef stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
@@ -1541,6 +1542,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
arg_types[i++] = int32_type; /* stride */
arg_types[i++] = get_vb_ptr_type(variant);   /* pipe_vertex_buffer's */
arg_types[i++] = int32_type; /* instance_id */
+   arg_types[i++] = int32_type; /* vertex_id_offset */
 
func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
 
@@ -1565,6 +1567,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
stride= LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
vb_ptr= LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+   vertex_id_offset  = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
 
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -1572,6 +1575,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
lp_build_name(system_values.instance_id, "instance_id");
+   lp_build_name(vertex_id_offset, "vertex_id_offset");
 
if (elts) {
   fetch_elts= LLVMGetParam(variant_func, 3);
@@ -1646,22 +1650,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
 #endif
   system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 
32*vector_length));
   for (i = 0; i < vector_length; ++i) {
- LLVMValueRef true_index =
+ LLVMValueRef vert_index =
 LLVMBuildAdd(builder,
  lp_loop.counter,
  lp_build_const_int32(gallivm, i), "");
- true_index = LLVMBuildAdd(builder, start, true_index, "");
+ LLVMValueRef true_index =
+LLVMBuildAdd(builder, start, vert_index, "");
+ LLVMValueRef vertex_id;
 
  /* make sure we're not out of bounds which can happen
   * if fetch_count % 4 != 0, because on the last iteration
   * a few of the 4 vertex fetches will be out of bounds */
  true_index = lp_build_min(&bld, true_index, fetch_max);
 
- system_values.vertex_id = LLVMBuildInsertElement(
-gallivm->builder,
-system_values.vertex_id, true_index,
-lp_build_const_int32(gallivm, i), "");
-
  if (elts) {
 LLVMValueRef fetch_ptr;
 LLVMValueRef index_overflowed;
@@ -1673,7 +1674,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
 index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  true_index, fetch_elt_max,
  "index_overflowed");
-
+
 lp_build_if(&if_ctx, gallivm, index_overflowed);
 {
/* Generate maximum possible index so that
@@ -1698,6 +1699,23 @@ draw_llvm_generate(struc

Re: [Mesa-dev] [PATCH 06/34] draw/gs: fix allocation of buffer for GS output vertices

2013-07-29 Thread Zack Rusin

That looks wrong to me. We already account for the "other fields" in the 
vertex_size.

- Original Message -
> From: Bryan Cain 
> 
> Before, it accounted for the size of the vertices but not the other fields
> in the vertex_header struct, which caused memory corruption.
> ---
>  src/gallium/auxiliary/draw/draw_gs.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c
> b/src/gallium/auxiliary/draw/draw_gs.c
> index cd63e2b..78727c6 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -560,7 +560,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader
> *shader,
> /* we allocate exactly one extra vertex per primitive to allow the GS to
> emit
>  * overflown vertices into some area where they won't harm anyone */
> output_verts->verts =
> -  (struct vertex_header *)MALLOC(output_verts->vertex_size *
> +  (struct vertex_header *)MALLOC(sizeof(struct vertex_header) +
> + output_verts->vertex_size *
>   max_out_prims *
>   shader->primitive_boundary);
>  
> --
> 1.8.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 24/34] gallivm: Fix indirect input fetches for gs.

2013-07-29 Thread Zack Rusin

That looks wrong to me as well. What would make get_indirect_index different in 
this case?

- Original Message -
> From: Fabian Bieler 
> 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index c199385..1d27e81 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1120,6 +1120,9 @@ emit_fetch_gs_input(
>reg->Register.File,
>reg->Register.Index,
>®->Indirect);
> +
> +  LLVMValueRef ind = lp_build_const_int32(gallivm, 0);
> +  attrib_index = LLVMBuildExtractElement(builder, attrib_index, ind,
> "");
> } else {
>attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
> }
> --
> 1.8.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] tgsi: add ucmp to the list of opcodes

2013-07-30 Thread Zack Rusin

we forgot to add ucmp to the list of opcodes, so it was never
generated for ureg.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index b87c4b1..93ec0b5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -198,6 +198,8 @@ OP12(SVIEWINFO)
 OP13(SAMPLE_POS)
 OP12(SAMPLE_INFO)
 
+OP13(UCMP)
+
 
 #undef OP00
 #undef OP01
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: obey clarified shift behavior

2013-07-30 Thread Zack Rusin

> From: Roland Scheidegger 
> 
> llvm shifts are undefined for shift counts exceeding (or matching) bit width,
> so need to apply a mask for the tgsi shift instructions.
> 
> v2: only use mask for the tgsi shift instructions, not for the build shift
> helpers. None of the internal callers need this behavior, and while llvm can
> optimize away the masking for constants there are legitimate cases where it
> might not be able to do so even if we know that shift count must be smaller
> than type width (currently all such callers do not use the build shift
> helpers).

Looks good to me
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] llvmpipe: make the front-face behavior match the gallium spec

2013-07-31 Thread Zack Rusin

The spec says that front-face is true if the value is >0 and false
if it's <0. To make sure that we follow the spec, lets just
subtract 0.5 from our value (llvmpipe did 1 for frontface and 0
otherwise), which will get us a positive num for frontface and
negative for backface.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_state_setup.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c 
b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index bb5cfc4..cecfbce 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -182,7 +182,10 @@ emit_facing_coef(struct gallivm_state *gallivm,
LLVMValueRef a0_0 = args->facing;
LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
-   LLVMValueRef a0 = vec4f(gallivm, a0_0f, zero, zero, zero, "facing");
+   LLVMValueRef face_val = LLVMBuildFSub(builder, a0_0f,
+ lp_build_const_float(gallivm, 0.5),
+ "");
+   LLVMValueRef a0 = vec4f(gallivm, face_val, zero, zero, zero, "facing");
LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero");
 
store_coef(gallivm, args, slot, a0, zerovec, zerovec);
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] draw: inject frontface info into wireframe outputs

2013-07-31 Thread Zack Rusin

Draw module can decompose primitives into wireframe models, which
is a fancy word for 'lines', unfortunately that decomposition means
that we weren't able to preserve the original front-face info which
could be derived from the original primitives (lines don't have a
'face'). To fix it allow draw module to inject a fake face semantic
into outputs from which the backends can figure out the original
frontfacing info of the primitives.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_context.c   |   43 
 src/gallium/auxiliary/draw/draw_context.h   |6 +++
 src/gallium/auxiliary/draw/draw_pipe.h  |3 ++
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c |   49 +++
 src/gallium/drivers/i915/i915_state_derived.c   |2 +
 src/gallium/drivers/llvmpipe/lp_context.h   |3 ++
 src/gallium/drivers/llvmpipe/lp_setup.c |1 +
 src/gallium/drivers/llvmpipe/lp_setup_context.h |1 +
 src/gallium/drivers/llvmpipe/lp_setup_line.c|   14 ++-
 src/gallium/drivers/llvmpipe/lp_state_derived.c |9 +
 src/gallium/drivers/r300/r300_state_derived.c   |1 +
 src/gallium/drivers/softpipe/sp_state_derived.c |2 +
 src/gallium/drivers/svga/svga_swtnl_state.c |1 +
 13 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 4a6ba1a..2e95b5c 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -39,6 +39,7 @@
 #include "util/u_helpers.h"
 #include "util/u_prim.h"
 #include "draw_context.h"
+#include "draw_pipe.h"
 #include "draw_vs.h"
 #include "draw_gs.h"
 
@@ -540,6 +541,22 @@ draw_get_shader_info(const struct draw_context *draw)
}
 }
 
+/**
+ * Prepare outputs slots from the draw module
+ *
+ * Certain parts of the draw module can emit additional
+ * outputs that can be quite useful to the backends, a good
+ * example of it is the process of decomposing primitives
+ * into wireframes (aka. lines) which normally would lose
+ * the face-side information, but using this method we can
+ * inject another shader output which passes the original
+ * face side information to the backend.
+ */
+void
+draw_prepare_shader_outputs(struct draw_context *draw)
+{
+   draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
+}
 
 /**
  * Ask the draw module for the location/slot of the given vertex attribute in
@@ -973,3 +990,29 @@ draw_stats_clipper_primitives(struct draw_context *draw,
   }
}
 }
+
+
+/**
+ * Returns true if the draw module will inject the frontface
+ * info into the outputs.
+ *
+ * Given the specified primitive and rasterizer state
+ * the function will figure out if the draw module
+ * will inject the front-face information into shader
+ * outputs. This is done to preserve the front-facing
+ * info when decomposing primitives into wireframes.
+ */
+boolean
+draw_will_inject_frontface(const struct draw_context *draw)
+{
+   unsigned reduced_prim = u_reduced_prim(draw->pt.prim);
+   const struct pipe_rasterizer_state *rast = draw->rasterizer;
+
+   if (reduced_prim != PIPE_PRIM_TRIANGLES) {
+  return FALSE;
+   }
+
+   return (rast &&
+   (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
+rast->fill_back != PIPE_POLYGON_MODE_FILL));
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h 
b/src/gallium/auxiliary/draw/draw_context.h
index 4a1b27e..0815047 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -126,10 +126,16 @@ draw_install_pstipple_stage(struct draw_context *draw, 
struct pipe_context *pipe
 struct tgsi_shader_info *
 draw_get_shader_info(const struct draw_context *draw);
 
+void
+draw_prepare_shader_outputs(struct draw_context *draw);
+
 int
 draw_find_shader_output(const struct draw_context *draw,
 uint semantic_name, uint semantic_index);
 
+boolean
+draw_will_inject_frontface(const struct draw_context *draw);
+
 uint
 draw_num_shader_outputs(const struct draw_context *draw);
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h 
b/src/gallium/auxiliary/draw/draw_pipe.h
index 4792507..2e48b56 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -102,6 +102,9 @@ void draw_pipe_passthrough_line(struct draw_stage *stage, 
struct prim_header *he
 void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header 
*header);
 
 
+void draw_unfilled_prepare_outputs(struct draw_context *context,
+   struct draw_stage *stage);
+
 
 /**
  * Get a writeable copy of a vertex.
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index d87741b..d8a603f 100644
--- a/src/gallium/a

Re: [Mesa-dev] [PATCH 2/2] draw: inject frontface info into wireframe outputs

2013-07-31 Thread Zack Rusin

> > +   if (draw_will_inject_frontface(lp_context->draw) &&
> I think it's annoying you have to do these calls to determine if there's
> a valid frontface here for each line instead of just per draw call but
> it doesn't seem easy to avoid it.

Yea, there's no trivial way of avoiding it.

> Also, no love for llvmpipe point face? I realize d3d10 doesn't require
> it but OpenGL (and IIRC d3d9) do.

I didn't know of any tests for the points and we care only about lines right 
now. It's just four extra lines of code or so, so I can trivially add it but I 
don't have anything to test it with.

> Looks like quite a heavy interface (and sort of silly to allocate 128
> bits in the vertex data (so actually twice that for one line) for 1 bit
> of information but given all our data passed on to the line/point funcs
> are float4 I don't really see any other easy way neither), but seems all
> necessary unfortunately. I guess another option would be to pass the
> face info always along the vertex data no matter what (which would mean
> all those additional calls for setting up outputs, determining if
> there's a valid frontface etc. could go along with the storage needed)
> for all primitives to the point/line/tri funcs but I'm not really
> thrilled about that idea neither (passing it for tris so it doesn't have
> to be recalculated may or may not be a good idea neither).

Yes, plus then we'd need a brand new pipeline stage that is always run and that 
is largely useless for vast majority of rendering. It's sort of a lose lose 
scenario. The only thing that is clear is that we have to pass the data along 
the shader outputs, everything else is a messy glue to make it possible.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/8] tgsi: detect prim id and front face usage in fs

2013-08-01 Thread Zack Rusin

Adding code to detect the usage of prim id and front face
semantics in fragment shaders.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c |9 +++--
 src/gallium/auxiliary/tgsi/tgsi_scan.h |1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 1fe1a07..e7bf6e6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -166,9 +166,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
   info->input_cylindrical_wrap[reg] = 
(ubyte)fulldecl->Interp.CylindricalWrap;
   info->num_inputs++;
 
-  if (procType == TGSI_PROCESSOR_FRAGMENT &&
-  fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
+  if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ if (fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
 info->reads_position = TRUE;
+ else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_PRIMID)
+info->uses_primid = TRUE;
+ else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_FACE)
+info->uses_frontface = TRUE;
+  }
}
else if (file == TGSI_FILE_SYSTEM_VALUE) {
   unsigned index = fulldecl->Range.First;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index cfa2b8e..e2fa73a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -74,6 +74,7 @@ struct tgsi_shader_info
boolean uses_instanceid;
boolean uses_vertexid;
boolean uses_primid;
+   boolean uses_frontface;
boolean origin_lower_left;
boolean pixel_center_integer;
boolean color0_writes_all_cbufs;
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/8] draw: stop crashing with extra shader outputs

2013-08-01 Thread Zack Rusin

Draw sometimes injects extra shader outputs (aa points, lines or
front face), unfortunately most of the pipeline and llvm code
didn't handle them at all. It only worked if number of inputs
happened to be bigger or equal to the number of shader outputs
plus the extra injected outputs. In particular when running
the pipeline which depends on the vertex_id in the vertex_header
things were completely broken. The patch adjust the code to
correctly use the total number of shader outputs (the standard
ones plus the injected ones) to make it all stop crashing and
work.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_context.c  |   43 
 src/gallium/auxiliary/draw/draw_context.h  |5 +++
 src/gallium/auxiliary/draw/draw_gs.c   |2 +-
 src/gallium/auxiliary/draw/draw_llvm.c |3 ++
 src/gallium/auxiliary/draw/draw_llvm.h |4 +-
 src/gallium/auxiliary/draw/draw_pipe.h |3 +-
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |6 +--
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |8 +---
 src/gallium/auxiliary/draw/draw_vs_variant.c   |2 +-
 9 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 2e95b5c..8bf3596 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -622,6 +622,49 @@ draw_num_shader_outputs(const struct draw_context *draw)
 
 
 /**
+ * Return total number of the vertex shader outputs.  This function
+ * also counts any extra vertex output attributes that may
+ * be filled in by some draw stages (such as AA point, AA line,
+ * front face).
+ */
+uint
+draw_total_vs_shader_outputs(const struct draw_context *draw)
+{
+   const struct tgsi_shader_info *info = &draw->vs.vertex_shader->info;
+   uint count;
+
+   count = info->num_outputs;
+   count += draw->extra_shader_outputs.num;
+
+   return count;
+}
+
+/**
+ * Return total number of the geometry shader outputs. This function
+ * also counts any extra geometry output attributes that may
+ * be filled in by some draw stages (such as AA point, AA line, front
+ * face).
+ */
+uint
+draw_total_gs_shader_outputs(const struct draw_context *draw)
+{
+   
+   const struct tgsi_shader_info *info;
+   uint count;
+
+   if (!draw->gs.geometry_shader)
+  return 0;
+
+   info = &draw->gs.geometry_shader->info;
+
+   count = info->num_outputs;
+   count += draw->extra_shader_outputs.num;
+
+   return count;
+}
+
+
+/**
  * Provide TGSI sampler objects for vertex/geometry shaders that use
  * texture fetches.  This state only needs to be set once per context.
  * This might only be used by software drivers for the time being.
diff --git a/src/gallium/auxiliary/draw/draw_context.h 
b/src/gallium/auxiliary/draw/draw_context.h
index 0815047..e9aa24d 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -139,6 +139,11 @@ draw_will_inject_frontface(const struct draw_context 
*draw);
 uint
 draw_num_shader_outputs(const struct draw_context *draw);
 
+uint
+draw_total_vs_shader_outputs(const struct draw_context *draw);
+
+uint
+draw_total_gs_shader_outputs(const struct draw_context *draw);
 
 void
 draw_texture_sampler(struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index cd63e2b..32fd91f 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -534,7 +534,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
 {
const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
unsigned input_stride = input_verts->vertex_size;
-   unsigned num_outputs = shader->info.num_outputs;
+   unsigned num_outputs = draw_total_gs_shader_outputs(shader->draw);
unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * 
sizeof(float);
unsigned num_input_verts = input_prim->linear ?
   input_verts->count :
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index c195a2b..8ecb3e7 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1827,6 +1827,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char 
*store)
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
key->has_gs = llvm->draw->gs.geometry_shader != NULL;
+   key->num_outputs = draw_total_vs_shader_outputs(llvm->draw);
key->pad1 = 0;
 
/* All variants of this shader will have the same value for
@@ -2264,6 +2265,8 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, 
char *store)
 
key = (struct draw_gs_llvm_variant_key *)store;
 
+

[Mesa-dev] [PATCH 3/8] draw/llvm: add some extra debugging output

2013-08-01 Thread Zack Rusin

when dumping shader outputs it's nice to have the integer
values of the outputs, in particular because some values
are integers.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 8ecb3e7..df0d2ed 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -977,6 +977,12 @@ convert_to_aos(struct gallivm_state *gallivm,
 
LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
  chan, 0));
 lp_build_print_value(gallivm, "val = ", out);
+{
+   LLVMValueRef iv =
+  LLVMBuildBitCast(builder, out, 
lp_build_int_vec_type(gallivm, soa_type), "");
+   
+   lp_build_print_value(gallivm, "  ival = ", iv);
+}
 #endif
 soa[chan] = out;
  }
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/8] draw: make sure clipping works with injected outputs

2013-08-01 Thread Zack Rusin

clipping would drop the extra outputs because it always
used the number of standard vertex shader outputs, without
geometry shader or extra outputs. The commit makes sure
that clipping with geometry shaders which have more outputs
than the current vertex shader and with extra outputs correctly
propagates the entire vertex.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_clip.c |   89 ---
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index e83586e..b76e9a5 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -136,7 +136,7 @@ static void interp( const struct clip_stage *clip,
const struct vertex_header *in,
 unsigned viewport_index )
 {
-   const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw);
+   const unsigned nr_attrs = draw_num_shader_outputs(clip->stage.draw);
const unsigned pos_attr = 
draw_current_shader_position_output(clip->stage.draw);
const unsigned clip_attr = 
draw_current_shader_clipvertex_output(clip->stage.draw);
unsigned j;
@@ -264,7 +264,6 @@ static void emit_poly( struct draw_stage *stage,
  header.flags |= edge_last;
 
   if (DEBUG_CLIP) {
- const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
  uint j, k;
  debug_printf("Clipped tri: (flat-shade-first = %d)\n",
   stage->draw->rasterizer->flatshade_first);
@@ -274,7 +273,7 @@ static void emit_poly( struct draw_stage *stage,
  header.v[j]->clip[1],
  header.v[j]->clip[2],
  header.v[j]->clip[3]);
-for (k = 0; k < vs->info.num_outputs; k++) {
+for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) {
debug_printf("  Vert %d: Attr %d:  %f %f %f %f\n", j, k,
 header.v[j]->data[k][0],
 header.v[j]->data[k][1],
@@ -283,7 +282,6 @@ static void emit_poly( struct draw_stage *stage,
 }
  }
   }
-
   stage->next->tri( stage->next, &header );
}
 }
@@ -609,6 +607,35 @@ clip_tri( struct draw_stage *stage,
 }
 
 
+static int
+find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
+uint semantic_name, uint semantic_index)
+{
+   int interp;
+   /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
+* from the array we've filled before. */
+   if (semantic_name == TGSI_SEMANTIC_COLOR ||
+   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+  interp = indexed_interp[semantic_index];
+   } else {
+  /* Otherwise, search in the FS inputs, with a decent default
+   * if we don't find it.
+   */
+  uint j;
+  interp = TGSI_INTERPOLATE_PERSPECTIVE;
+  if (fs) {
+ for (j = 0; j < fs->info.num_inputs; j++) {
+if (semantic_name == fs->info.input_semantic_name[j] &&
+semantic_index == fs->info.input_semantic_index[j]) {
+   interp = fs->info.input_interpolate[j];
+   break;
+}
+ }
+  }
+   }
+   return interp;
+}
+
 /* Update state.  Could further delay this until we hit the first
  * primitive that really requires clipping.
  */
@@ -616,11 +643,9 @@ static void
 clip_init_state( struct draw_stage *stage )
 {
struct clip_stage *clipper = clip_stage( stage );
-   const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
-   const struct draw_geometry_shader *gs = stage->draw->gs.geometry_shader;
const struct draw_fragment_shader *fs = stage->draw->fs.fragment_shader;
-   uint i;
-   const struct tgsi_shader_info *vs_info = gs ? &gs->info : &vs->info;
+   uint i, j;
+   const struct tgsi_shader_info *info = draw_get_shader_info(stage->draw);
 
/* We need to know for each attribute what kind of interpolation is
 * done on it (flat, smooth or noperspective).  But the information
@@ -663,42 +688,36 @@ clip_init_state( struct draw_stage *stage )
 
clipper->num_flat_attribs = 0;
memset(clipper->noperspective_attribs, 0, 
sizeof(clipper->noperspective_attribs));
-   for (i = 0; i < vs_info->num_outputs; i++) {
-  /* Find the interpolation mode for a specific attribute
-   */
-  int interp;
-
-  /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
-   * from the array we've filled before. */
-  if (vs_info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
-  vs_info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- interp = indexed_interp[vs_info->output_semantic_index[i]];
-  } else {
-

[Mesa-dev] [PATCH 5/8] draw: use the vertex size

2013-08-01 Thread Zack Rusin

Instead of using the magical 4 use the above computed
vertex size. Doesn't change the behavior, just makes the code
a bit cleaner.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_vbuf.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c 
b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index d3b38eb..092440e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -250,7 +250,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
}
 
hw_key.nr_elements = vbuf->vinfo->num_attribs;
-   hw_key.output_stride = vbuf->vinfo->size * 4;
+   hw_key.output_stride = vbuf->vertex_size;
 
/* Don't bother with caching at this stage:
 */
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/8] draw: fix front face injection

2013-08-01 Thread Zack Rusin

Inject front face only if the fragment shader uses it and
propagate through all channels because otherwise we'll
need to figure out the exact swizzle that the fs expects and
it's just simpler to make sure all the components within
the front face register are correctly set.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c |   24 ++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index d8a603f..f9a31b0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -37,6 +37,7 @@
 #include "pipe/p_defines.h"
 #include "draw_private.h"
 #include "draw_pipe.h"
+#include "draw_fs.h"
 
 
 struct unfilled_stage {
@@ -67,18 +68,20 @@ inject_front_face_info(struct draw_stage *stage,
   (stage->draw->rasterizer->front_ccw && ccw) ||
   (!stage->draw->rasterizer->front_ccw && !ccw));
unsigned slot = unfilled->face_slot;
-   struct vertex_header *v0 = header->v[0];
-   struct vertex_header *v1 = header->v[1];
-   struct vertex_header *v2 = header->v[2];
+   unsigned i;
 
/* In case the backend doesn't care about it */
if (slot < 0) {
   return;
}
 
-   v0->data[slot][0] = is_front_face;
-   v1->data[slot][0] = is_front_face;
-   v2->data[slot][0] = is_front_face;
+   for (i = 0; i < 3; ++i) {
+  struct vertex_header *v = header->v[i];
+  v->data[slot][0] = is_front_face;
+  v->data[slot][1] = is_front_face;
+  v->data[slot][2] = is_front_face;
+  v->data[slot][3] = is_front_face;
+   }
 }
 

@@ -231,9 +234,12 @@ draw_unfilled_prepare_outputs( struct draw_context *draw,
 {
struct unfilled_stage *unfilled = unfilled_stage(stage);
const struct pipe_rasterizer_state *rast = draw ? draw->rasterizer : 0;
-   if (rast &&
-   (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
-rast->fill_back != PIPE_POLYGON_MODE_FILL)) {
+   boolean is_unfilled = (rast &&
+  (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
+   rast->fill_back != PIPE_POLYGON_MODE_FILL));
+   const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
+   
+   if (is_unfilled && fs && fs->info.uses_frontface)  {
   unfilled->face_slot = draw_alloc_extra_vertex_attrib(
  stage->draw, TGSI_SEMANTIC_FACE, 0);
} else {
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/8] llvmpipe: don't interpolate front face or prim id

2013-08-01 Thread Zack Rusin

The loop was iterating over all the fs inputs and setting them
to perspective interpolation, then after the loop we were
creating extra output slots with the correct interpolation. Instead
of injecting bogus extra outputs, just set the interpolation
on front face and prim id correctly when doing the initial scan
of fs inputs.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_state_derived.c |   30 +++
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c 
b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 5a51b50..7b1e6f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -69,8 +69,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
 
vs_index = draw_find_shader_output(llvmpipe->draw,
-   TGSI_SEMANTIC_POSITION,
-   0);
+  TGSI_SEMANTIC_POSITION,
+  0);
 
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
 
@@ -89,12 +89,20 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
  llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
   }
 
-  /*
-   * Emit the requested fs attribute for all but position.
-   */
-  draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+  if (lpfs->info.base.input_semantic_index[i] == 0 &&
+  lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
+ llvmpipe->face_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+  } else if (lpfs->info.base.input_semantic_index[i] == 0 &&
+ lpfs->info.base.input_semantic_name[i] == 
TGSI_SEMANTIC_PRIMID) {
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+  } else {
+ /*
+  * Emit the requested fs attribute for all but position.
+  */
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+  }
}
-
/* Figure out if we need bcolor as well.
 */
for (i = 0; i < 2; i++) {
@@ -140,14 +148,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
   llvmpipe->layer_slot = 0;
}
 
-   /* Check for a fake front face for unfilled primitives*/
-   vs_index = draw_find_shader_output(llvmpipe->draw,
-  TGSI_SEMANTIC_FACE, 0);
-   if (vs_index >= 0) {
-  llvmpipe->face_slot = vinfo->num_attribs;
-  draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
-   }
-
draw_compute_vertex_size(vinfo);
lp_setup_set_vertex_info(llvmpipe->setup, vinfo);
 }
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/8] draw: implement proper primitive assembler as a pipeline stage

2013-08-01 Thread Zack Rusin

we used to have a face primitive assembler that we ran after if
the gs was missing but we had adjacency primitives in the pipeline,
lets convert it to a pipeline stage, which allows us to use it
to inject outputs (primitive id) into the vertices. it's also
a lot cleaner because the decomposition is already handled for us.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/Makefile.sources |2 +-
 src/gallium/auxiliary/draw/draw_context.c  |1 +
 src/gallium/auxiliary/draw/draw_pipe.c |4 +
 src/gallium/auxiliary/draw/draw_pipe.h |5 +
 src/gallium/auxiliary/draw/draw_pipe_ia.c  |  253 
 src/gallium/auxiliary/draw/draw_pipe_validate.c|   15 +-
 src/gallium/auxiliary/draw/draw_prim_assembler.c   |  225 -
 src/gallium/auxiliary/draw/draw_prim_assembler.h   |   62 -
 .../auxiliary/draw/draw_prim_assembler_tmp.h   |   31 ---
 src/gallium/auxiliary/draw/draw_private.h  |1 +
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |   18 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |   18 +-
 12 files changed, 283 insertions(+), 352 deletions(-)
 create mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c
 delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.c
 delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.h
 delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler_tmp.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index acbcef7..ee93e8b 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -13,6 +13,7 @@ C_SOURCES := \
draw/draw_pipe_clip.c \
draw/draw_pipe_cull.c \
draw/draw_pipe_flatshade.c \
+draw/draw_pipe_ia.c \
draw/draw_pipe_offset.c \
draw/draw_pipe_pstipple.c \
draw/draw_pipe_stipple.c \
@@ -23,7 +24,6 @@ C_SOURCES := \
draw/draw_pipe_vbuf.c \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
-   draw/draw_prim_assembler.c \
draw/draw_pt.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 8bf3596..bbb2904 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw)
 void
 draw_prepare_shader_outputs(struct draw_context *draw)
 {
+   draw_ia_prepare_outputs(draw, draw->pipeline.ia);
draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
 }
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c 
b/src/gallium/auxiliary/draw/draw_pipe.c
index f1ee6cb..8140299 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -49,6 +49,7 @@ boolean draw_pipeline_init( struct draw_context *draw )
draw->pipeline.clip  = draw_clip_stage( draw );
draw->pipeline.flatshade = draw_flatshade_stage( draw );
draw->pipeline.cull  = draw_cull_stage( draw );
+   draw->pipeline.ia= draw_ia_stage( draw );
draw->pipeline.validate  = draw_validate_stage( draw );
draw->pipeline.first = draw->pipeline.validate;
 
@@ -61,6 +62,7 @@ boolean draw_pipeline_init( struct draw_context *draw )
!draw->pipeline.clip ||
!draw->pipeline.flatshade ||
!draw->pipeline.cull ||
+   !draw->pipeline.ia ||
!draw->pipeline.validate)
   return FALSE;
 
@@ -95,6 +97,8 @@ void draw_pipeline_destroy( struct draw_context *draw )
   draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
if (draw->pipeline.cull)
   draw->pipeline.cull->destroy( draw->pipeline.cull );
+   if (draw->pipeline.ia)
+  draw->pipeline.ia->destroy( draw->pipeline.ia );
if (draw->pipeline.validate)
   draw->pipeline.validate->destroy( draw->pipeline.validate );
if (draw->pipeline.aaline)
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h 
b/src/gallium/auxiliary/draw/draw_pipe.h
index 70c286f..70822a4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -91,7 +91,10 @@ extern struct draw_stage *draw_stipple_stage( struct 
draw_context *context );
 extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
 extern struct draw_stage *draw_wide_point_stage( struct draw_context *context 
);
 extern struct draw_stage *draw_validate_stage( struct draw_context *context );
+extern struct draw_stage *draw_ia_stage(struct draw_context *context);
 
+boolean draw_ia_stage_required(const struct draw_context *context,
+   unsigned prim);
 
 extern void draw_free_temp_verts( struct draw_stage *stage );
 extern boolean draw_alloc_temp_verts( struc

Re: [Mesa-dev] [PATCH 8/8] draw: implement proper primitive assembler as a pipeline stage

2013-08-02 Thread Zack Rusin

Yea, it's quite bonkers, but that's the way it has to be to make it work right 
now. Personally I'd really like to write a new version of draw, without the 5 
emit paths, 4 different vertex shading paths, with interface that is capable of 
emitting more than just float[4]'s... For now though this works, even if it is 
very ugly.

z

- Original Message -
> Am 02.08.2013 08:28, schrieb Zack Rusin:
> > we used to have a face primitive assembler that we ran after if
> > the gs was missing but we had adjacency primitives in the pipeline,
> > lets convert it to a pipeline stage, which allows us to use it
> > to inject outputs (primitive id) into the vertices. it's also
> > a lot cleaner because the decomposition is already handled for us.
> > 
> > Signed-off-by: Zack Rusin 
> > ---
> >  src/gallium/auxiliary/Makefile.sources |2 +-
> >  src/gallium/auxiliary/draw/draw_context.c  |1 +
> >  src/gallium/auxiliary/draw/draw_pipe.c |4 +
> >  src/gallium/auxiliary/draw/draw_pipe.h |5 +
> >  src/gallium/auxiliary/draw/draw_pipe_ia.c  |  253
> >  
> >  src/gallium/auxiliary/draw/draw_pipe_validate.c|   15 +-
> >  src/gallium/auxiliary/draw/draw_prim_assembler.c   |  225
> >  -
> >  src/gallium/auxiliary/draw/draw_prim_assembler.h   |   62 -
> >  .../auxiliary/draw/draw_prim_assembler_tmp.h   |   31 ---
> >  src/gallium/auxiliary/draw/draw_private.h  |1 +
> >  .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |   18 +-
> >  .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |   18 +-
> >  12 files changed, 283 insertions(+), 352 deletions(-)
> >  create mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c
> >  delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.c
> >  delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler.h
> >  delete mode 100644 src/gallium/auxiliary/draw/draw_prim_assembler_tmp.h
> > 
> > diff --git a/src/gallium/auxiliary/Makefile.sources
> > b/src/gallium/auxiliary/Makefile.sources
> > index acbcef7..ee93e8b 100644
> > --- a/src/gallium/auxiliary/Makefile.sources
> > +++ b/src/gallium/auxiliary/Makefile.sources
> > @@ -13,6 +13,7 @@ C_SOURCES := \
> > draw/draw_pipe_clip.c \
> > draw/draw_pipe_cull.c \
> > draw/draw_pipe_flatshade.c \
> > +draw/draw_pipe_ia.c \
> Formatting looks off here.
> 
> > draw/draw_pipe_offset.c \
> > draw/draw_pipe_pstipple.c \
> > draw/draw_pipe_stipple.c \
> > @@ -23,7 +24,6 @@ C_SOURCES := \
> > draw/draw_pipe_vbuf.c \
> > draw/draw_pipe_wide_line.c \
> > draw/draw_pipe_wide_point.c \
> > -   draw/draw_prim_assembler.c \
> > draw/draw_pt.c \
> > draw/draw_pt_emit.c \
> > draw/draw_pt_fetch.c \
> > diff --git a/src/gallium/auxiliary/draw/draw_context.c
> > b/src/gallium/auxiliary/draw/draw_context.c
> > index 8bf3596..bbb2904 100644
> > --- a/src/gallium/auxiliary/draw/draw_context.c
> > +++ b/src/gallium/auxiliary/draw/draw_context.c
> > @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw)
> >  void
> >  draw_prepare_shader_outputs(struct draw_context *draw)
> >  {
> > +   draw_ia_prepare_outputs(draw, draw->pipeline.ia);
> > draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
> >  }
> >  
> > diff --git a/src/gallium/auxiliary/draw/draw_pipe.c
> > b/src/gallium/auxiliary/draw/draw_pipe.c
> > index f1ee6cb..8140299 100644
> > --- a/src/gallium/auxiliary/draw/draw_pipe.c
> > +++ b/src/gallium/auxiliary/draw/draw_pipe.c
> > @@ -49,6 +49,7 @@ boolean draw_pipeline_init( struct draw_context *draw )
> > draw->pipeline.clip  = draw_clip_stage( draw );
> > draw->pipeline.flatshade = draw_flatshade_stage( draw );
> > draw->pipeline.cull  = draw_cull_stage( draw );
> > +   draw->pipeline.ia= draw_ia_stage( draw );
> > draw->pipeline.validate  = draw_validate_stage( draw );
> > draw->pipeline.first = draw->pipeline.validate;
> >  
> > @@ -61,6 +62,7 @@ boolean draw_pipeline_init( struct draw_context *draw )
> > !draw->pipeline.clip ||
> > !draw->pipeline.flatshade ||
> > !draw->pipeline.cull ||
> > +   !draw->pipeline.ia ||
> > !draw->pipeline.validate)
> >return FALSE;
> >  
> > @@ -95,6 +97,8 @@ void draw_pipeline_destroy( struct draw_context *draw )
> >draw-&

Re: [Mesa-dev] [PATCH] util: implement table-based + linear interpolation linear-to-srgb conversion

2013-08-05 Thread Zack Rusin

Looks good to me. A small comment above the disabled version noting that it's 
disabled because it's a bit slower might be useful for the next person who 
reads the code.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> Should be much faster, seems to work in softpipe.
> While here (also it's now disabled) fix up the pow factor - the former value
> is what is in GL core it is however not actually accurate to fp32 standard
> (as it is 1.0/2.4), and if someone would do all the accurate math there's no
> reason to waste 8 mantissa bits or so...
> 
> v2: use real table generating function instead of just printing the values
> (might take a bit longer as it does calculations on some 3+ million floats
> but much more descriptive obviously).
> Also fix up another pow factor (this time in the python code) - wondering
> where the couple one bit errors came from :-(.
> ---
>  src/gallium/auxiliary/util/u_format_srgb.h  |   55
>  +-
>  src/gallium/auxiliary/util/u_format_srgb.py |   57
>  ++-
>  2 files changed, 101 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.h
> b/src/gallium/auxiliary/util/u_format_srgb.h
> index 82ed957..f3e1b20 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.h
> +++ b/src/gallium/auxiliary/util/u_format_srgb.h
> @@ -39,6 +39,7 @@
>  
>  
>  #include "pipe/p_compiler.h"
> +#include "u_pack_color.h"
>  #include "u_math.h"
>  
>  
> @@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256];
>  extern const uint8_t
>  util_format_linear_to_srgb_8unorm_table[256];
>  
> +extern const unsigned
> +util_format_linear_to_srgb_helper_table[104];
> +
>  
>  /**
>   * Convert a unclamped linear float to srgb value in the [0,255].
> - * XXX this hasn't been tested (render to srgb surface).
> - * XXX this needs optimization.
>   */
>  static INLINE uint8_t
>  util_format_linear_float_to_srgb_8unorm(float x)
>  {
> -   if (x >= 1.0f)
> -  return 255;
> -   else if (x >= 0.0031308f)
> -  return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
> -   else if (x > 0.0f)
> -  return float_to_ubyte(12.92f * x);
> -   else
> -  return 0;
> +   if (0) {
> +  if (x >= 1.0f)
> + return 255;
> +  else if (x >= 0.0031308f)
> + return float_to_ubyte(1.055f * powf(x, 0.4166f) - 0.055f);
> +  else if (x > 0.0f)
> + return float_to_ubyte(12.92f * x);
> +  else
> + return 0;
> +   }
> +   else {
> +  /*
> +   * This is taken from https://gist.github.com/rygorous/2203834
> +   * Use LUT and do linear interpolation.
> +   */
> +  union fi almostone, minval, f;
> +  unsigned tab, bias, scale, t;
> +
> +  almostone.ui = 0x3f7f;
> +  minval.ui = (127-13) << 23;
> +
> +  /*
> +   * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1,
> respectively.
> +   * The tests are carefully written so that NaNs map to 0, same as in
> the
> +   * reference implementation.
> +   */
> +  if (!(x > minval.f))
> + x = minval.f;
> +  if (x > almostone.f)
> + x = almostone.f;
> +
> +  /* Do the table lookup and unpack bias, scale */
> +  f.f = x;
> +  tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >>
> 20];
> +  bias = (tab >> 16) << 9;
> +  scale = tab & 0x;
> +
> +  /* Grab next-highest mantissa bits and perform linear interpolation */
> +  t = (f.ui >> 12) & 0xff;
> +  return (uint8_t) ((bias + scale*t) >> 16);
> +   }
>  }
>  
>  
> diff --git a/src/gallium/auxiliary/util/u_format_srgb.py
> b/src/gallium/auxiliary/util/u_format_srgb.py
> index cd63ae7..c6c02f0 100644
> --- a/src/gallium/auxiliary/util/u_format_srgb.py
> +++ b/src/gallium/auxiliary/util/u_format_srgb.py
> @@ -40,6 +40,7 @@ CopyRight = '''
>  
>  
>  import math
> +import struct
>  
>  
>  def srgb_to_linear(x):
> @@ -51,10 +52,11 @@ def srgb_to_linear(x):
>  
>  def linear_to_srgb(x):
>  if x >= 0.0031308:
> -return 1.055 * math.pow(x, 0.41666) - 0.055
> +return 1.055 * math.pow(x, 0.4166) - 0.055
>  else:
>  return 12.92 * x
>  
> +
>  def generate_srgb_tables():
>  print 'const float'
>  print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
> @@ -84,6 +86,59 @@ def generate_srgb_tables():
>  print '};&#

[Mesa-dev] [PATCH] draw: fix slot detection

2013-08-06 Thread Zack Rusin

Nowadays -1 for slots means that the semantic is not present, so
we need to store it in a signed variables, otherwise <0 comparisons
are pointless. Fixes
http://bugzilla.eng.vmware.com/show_bug.cgi?id=67811 (at least
with softpipe, edgeflags don't work wit llvmpipe)

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c |2 +-
 src/gallium/drivers/llvmpipe/lp_setup_context.h |2 +-
 src/gallium/drivers/llvmpipe/lp_setup_line.c|1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index c6ee95c..68bab72 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -67,7 +67,7 @@ inject_front_face_info(struct draw_stage *stage,
boolean is_front_face = (
   (stage->draw->rasterizer->front_ccw && ccw) ||
   (!stage->draw->rasterizer->front_ccw && !ccw));
-   unsigned slot = unfilled->face_slot;
+   int slot = unfilled->face_slot;
unsigned i;
 
/* In case the backend doesn't care about it */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h 
b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index ea1d0d6..44be85f 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -106,7 +106,7 @@ struct lp_setup_context
float psize;
unsigned viewport_index_slot;
unsigned layer_slot;
-   unsigned face_slot;
+   int face_slot;
 
struct pipe_framebuffer_state fb;
struct u_rect framebuffer;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c 
b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 3b16163..a25a6b0 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -622,7 +622,6 @@ try_setup_line( struct lp_setup_context *setup,
} else {
   line->inputs.frontfacing = TRUE;
}
-   
 
/* Setup parameter interpolants:
 */
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] gallivm: propagate scalar_lod to emit_size_query too

2013-08-07 Thread Zack Rusin

- Original Message -
> From: Roland Scheidegger 
> 
> Clearly the returned values need to be per-element if the lod is per element.
> Does not actually change behavior yet.

Looks good. For the entire series:
Reviewed-by: Zack Rusin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: honor d3d10 floating point rules for shadow comparisons

2013-08-07 Thread Zack Rusin

- Original Message -
> From: Roland Scheidegger 
> 
> d3d10 specifies ordered comparisons for everything but not_equal which is
> unordered
> (http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx).
> OpenGL probably doesn't care.

This series looks good too. For all three:
Reviewed-by: Zack Rusin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] draw: cleanup the extra attribs

2013-08-08 Thread Zack Rusin

Before inserting new front face and prim id outputs cleanup
the old extra outputs, otherwise our cache will use previous
output slots which will break as soon as outputs of the current
shader don't match the last.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_context.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index af9caee..2dc6772 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw)
 void
 draw_prepare_shader_outputs(struct draw_context *draw)
 {
+   draw_remove_extra_vertex_attribs(draw);
draw_ia_prepare_outputs(draw, draw->pipeline.ia);
draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
 }
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] draw: reset the vertex id when injecting new primitive id

2013-08-08 Thread Zack Rusin

Without reseting the vertex id, with primitives where the same
vertex is used with different primitives (e.g. tri/lines strips)
our vbuf module won't re-emit those vertices with the changed
primitive id. So lets reset the vertex id whenever injecting
new primitive id to make sure that the vertex data is correctly
emitted.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_ia.c |9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_ia.c 
b/src/gallium/auxiliary/draw/draw_pipe_ia.c
index ecbb233..d64f19b 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_ia.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_ia.c
@@ -68,6 +68,15 @@ inject_primid(struct draw_stage *stage,
 
for (i = 0; i < num_verts; ++i) {
   struct vertex_header *v = header->v[i];
+  /* We have to reset the vertex_id because it's used by
+   * vbuf to figure out if the vertex had already been
+   * emitted. For line/tri strips the first vertex of
+   * subsequent primitives would already be emitted,
+   * but since we're changing the primitive id on the vertex
+   * we want to make sure it's reemitted with the correct
+   * data.
+   */
+  v->vertex_id = UNDEFINED_VERTEX_ID;
   memcpy(&v->data[slot][0], &primid, sizeof(primid));
   memcpy(&v->data[slot][1], &primid, sizeof(primid));
   memcpy(&v->data[slot][2], &primid, sizeof(primid));
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler

2013-08-08 Thread Zack Rusin

We can't be injecting the primitive id's in the pipeline because
by that time the primitives have already been decomposed. To
properly number the primitives we need to handle the adjacency
primitives by hand. This patch moves the prim id injection into
the original primitive assembler and completely removes the
useless pipeline stage.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/Makefile.sources   |1 -
 src/gallium/auxiliary/draw/draw_context.c|8 +-
 src/gallium/auxiliary/draw/draw_pipe.c   |4 -
 src/gallium/auxiliary/draw/draw_pipe.h   |7 -
 src/gallium/auxiliary/draw/draw_pipe_ia.c|  259 --
 src/gallium/auxiliary/draw/draw_pipe_validate.c  |   14 --
 src/gallium/auxiliary/draw/draw_prim_assembler.c |  168 +-
 src/gallium/auxiliary/draw/draw_prim_assembler.h |   12 +
 src/gallium/auxiliary/draw/draw_private.h|4 +-
 9 files changed, 180 insertions(+), 297 deletions(-)
 delete mode 100644 src/gallium/auxiliary/draw/draw_pipe_ia.c

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index b0172de..acbcef7 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -13,7 +13,6 @@ C_SOURCES := \
draw/draw_pipe_clip.c \
draw/draw_pipe_cull.c \
draw/draw_pipe_flatshade.c \
-draw/draw_pipe_ia.c \
draw/draw_pipe_offset.c \
draw/draw_pipe_pstipple.c \
draw/draw_pipe_stipple.c \
diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 2dc6772..2d4843e 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -40,6 +40,7 @@
 #include "util/u_prim.h"
 #include "draw_context.h"
 #include "draw_pipe.h"
+#include "draw_prim_assembler.h"
 #include "draw_vs.h"
 #include "draw_gs.h"
 
@@ -95,6 +96,10 @@ draw_create_context(struct pipe_context *pipe, boolean 
try_llvm)
if (!draw_init(draw))
   goto err_destroy;
 
+   draw->ia = draw_prim_assembler_create(draw);
+   if (!draw->ia)
+  goto err_destroy;
+
return draw;
 
 err_destroy:
@@ -206,6 +211,7 @@ void draw_destroy( struct draw_context *draw )
   draw->render->destroy( draw->render );
*/
 
+   draw_prim_assembler_destroy(draw->ia);
draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
draw_vs_destroy( draw );
@@ -556,7 +562,7 @@ void
 draw_prepare_shader_outputs(struct draw_context *draw)
 {
draw_remove_extra_vertex_attribs(draw);
-   draw_ia_prepare_outputs(draw, draw->pipeline.ia);
+   draw_prim_assembler_prepare_outputs(draw->ia);
draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
 }
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c 
b/src/gallium/auxiliary/draw/draw_pipe.c
index 8140299..f1ee6cb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -49,7 +49,6 @@ boolean draw_pipeline_init( struct draw_context *draw )
draw->pipeline.clip  = draw_clip_stage( draw );
draw->pipeline.flatshade = draw_flatshade_stage( draw );
draw->pipeline.cull  = draw_cull_stage( draw );
-   draw->pipeline.ia= draw_ia_stage( draw );
draw->pipeline.validate  = draw_validate_stage( draw );
draw->pipeline.first = draw->pipeline.validate;
 
@@ -62,7 +61,6 @@ boolean draw_pipeline_init( struct draw_context *draw )
!draw->pipeline.clip ||
!draw->pipeline.flatshade ||
!draw->pipeline.cull ||
-   !draw->pipeline.ia ||
!draw->pipeline.validate)
   return FALSE;
 
@@ -97,8 +95,6 @@ void draw_pipeline_destroy( struct draw_context *draw )
   draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
if (draw->pipeline.cull)
   draw->pipeline.cull->destroy( draw->pipeline.cull );
-   if (draw->pipeline.ia)
-  draw->pipeline.ia->destroy( draw->pipeline.ia );
if (draw->pipeline.validate)
   draw->pipeline.validate->destroy( draw->pipeline.validate );
if (draw->pipeline.aaline)
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h 
b/src/gallium/auxiliary/draw/draw_pipe.h
index 70822a4..7c9ed6c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -91,10 +91,6 @@ extern struct draw_stage *draw_stipple_stage( struct 
draw_context *context );
 extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
 extern struct draw_stage *draw_wide_point_stage( struct draw_context *context 
);
 extern struct draw_stage *draw_validate_stage( struct draw_context *context );
-extern struct draw_stage *draw_ia_stage(struct draw_context *context);
-
-boolean draw_ia_stage_required(const struct draw_context *context,
-

Re: [Mesa-dev] [PATCH 2/3] draw: reset the vertex id when injecting new primitive id

2013-08-08 Thread Zack Rusin

Don't worry about this one too much. The next patch removes draw_pipe_ia.c 
anyway...

- Original Message -
> Without reseting the vertex id, with primitives where the same
> vertex is used with different primitives (e.g. tri/lines strips)
> our vbuf module won't re-emit those vertices with the changed
> primitive id. So lets reset the vertex id whenever injecting
> new primitive id to make sure that the vertex data is correctly
> emitted.
> 
> Signed-off-by: Zack Rusin 
> ---
>  src/gallium/auxiliary/draw/draw_pipe_ia.c |9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_ia.c
> b/src/gallium/auxiliary/draw/draw_pipe_ia.c
> index ecbb233..d64f19b 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_ia.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_ia.c
> @@ -68,6 +68,15 @@ inject_primid(struct draw_stage *stage,
>  
> for (i = 0; i < num_verts; ++i) {
>struct vertex_header *v = header->v[i];
> +  /* We have to reset the vertex_id because it's used by
> +   * vbuf to figure out if the vertex had already been
> +   * emitted. For line/tri strips the first vertex of
> +   * subsequent primitives would already be emitted,
> +   * but since we're changing the primitive id on the vertex
> +   * we want to make sure it's reemitted with the correct
> +   * data.
> +   */
> +  v->vertex_id = UNDEFINED_VERTEX_ID;
>memcpy(&v->data[slot][0], &primid, sizeof(primid));
>memcpy(&v->data[slot][1], &primid, sizeof(primid));
>memcpy(&v->data[slot][2], &primid, sizeof(primid));
> --
> 1.7.10.4
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] gallivm: use texture target from shader instead of static state for size query

2013-08-08 Thread Zack Rusin

Series looks good to me.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> d3d10 has no notion of distinct array resources neither at the resource nor
> sampler view level. However, shader dcl of resources certainly has, and
> d3d10 expects resinfo to return the values according to that - in particular
> a resource might have been a 1d texture with some array layers, then the
> sampler view might have only used 1 layer so it can be accessed both as 1d
> or 1d array texture (I think - the former definitely works). resinfo of a
> resource decleared as array needs to return number of array layers but
> non-array resource needs to return 0 (and not 1). Hence fix this by passing
> the target from the shader decl to emit_size_query and use that (in case of
> OpenGL the target will come from the instruction itself).
> Could probably do the same for actual sampling, though it may not matter
> there
> (as the bogus components will essentially get clamped away), possibly could
> wreak havoc though if it REALLY doesn't match (which is of course an error
> but still).
> ---
>  src/gallium/auxiliary/draw/draw_llvm_sample.c |2 +
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   32 ++-
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h   |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |   43
>  -
>  src/gallium/drivers/llvmpipe/lp_tex_sample.c  |2 +
>  6 files changed, 77 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> index 3016d7c..f10cba3 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> @@ -270,6 +270,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct
> lp_build_sampler_soa *base,
>struct gallivm_state *gallivm,
>struct lp_type type,
>unsigned texture_unit,
> +  unsigned target,
>boolean need_nr_mips,
>boolean scalar_lod,
>LLVMValueRef explicit_lod, /* optional
>*/
> @@ -284,6 +285,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct
> lp_build_sampler_soa *base,
> &sampler->dynamic_state.base,
> type,
> texture_unit,
> +   target,
> need_nr_mips,
> scalar_lod,
> explicit_lod,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index dff8be2..db3ea1d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -497,6 +497,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
>  struct lp_sampler_dynamic_state *dynamic_state,
>  struct lp_type int_type,
>  unsigned texture_unit,
> +unsigned target,
>  boolean need_nr_mips,
>  boolean scalar_lod,
>  LLVMValueRef explicit_lod,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index b0bb58b..e403ac8 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1943,6 +1943,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
>  struct lp_sampler_dynamic_state *dynamic_state,
>  struct lp_type int_type,
>  unsigned texture_unit,
> +unsigned target,
>  boolean need_nr_mips,
>  boolean scalar_lod,
>  LLVMValueRef explicit_lod,
> @@ -1955,9 +1956,36 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
> unsigned num_lods = 1;
> struct lp_build_context bld_int_vec;
>  
> -   dims = texture_dims(static_state->target);
> +   /*
> +* Do some sanity verification about bound texture and shader dcl target.
> +* Not entirely sure what's possible but assume array/non-array
> +* always compatible (probably not ok for OpenGL but d3d10 has no
> +

Re: [Mesa-dev] [PATCH] gallivm: set non-existing values really to zero in size queries for d3d10

2013-08-08 Thread Zack Rusin

Looks good.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> My previous attempt at doing so double-failed miserably (minification of
> zero still gives one, and even if it would not the value was never written
> anyway).
> While here also rename the confusingly named int_vec bld as we have int vecs
> of different sizes, and rename need_nr_mips (as this also changes
> out-of-bounds
> behavior) to is_sviewinfo too.
> ---
>  src/gallium/auxiliary/draw/draw_llvm_sample.c |4 +--
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h |2 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   34
>  ++---
>  src/gallium/drivers/llvmpipe/lp_tex_sample.c  |4 +--
>  4 files changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> index f10cba3..97b0255 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> @@ -271,7 +271,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct
> lp_build_sampler_soa *base,
>struct lp_type type,
>unsigned texture_unit,
>unsigned target,
> -  boolean need_nr_mips,
> +  boolean is_sviewinfo,
>boolean scalar_lod,
>LLVMValueRef explicit_lod, /* optional
>*/
>LLVMValueRef *sizes_out)
> @@ -286,7 +286,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct
> lp_build_sampler_soa *base,
> type,
> texture_unit,
> target,
> -   need_nr_mips,
> +   is_sviewinfo,
> scalar_lod,
> explicit_lod,
> sizes_out);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index db3ea1d..75e8c59 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -498,7 +498,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
>  struct lp_type int_type,
>  unsigned texture_unit,
>  unsigned target,
> -boolean need_nr_mips,
> +boolean is_viewinfo,
>  boolean scalar_lod,
>  LLVMValueRef explicit_lod,
>  LLVMValueRef *sizes_out);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index e403ac8..65d6e7b 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1944,7 +1944,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
>  struct lp_type int_type,
>  unsigned texture_unit,
>  unsigned target,
> -boolean need_nr_mips,
> +boolean is_sviewinfo,
>  boolean scalar_lod,
>  LLVMValueRef explicit_lod,
>  LLVMValueRef *sizes_out)
> @@ -1954,7 +1954,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
> int dims, i;
> boolean has_array;
> unsigned num_lods = 1;
> -   struct lp_build_context bld_int_vec;
> +   struct lp_build_context bld_int_vec4;
>  
> /*
>  * Do some sanity verification about bound texture and shader dcl target.
> @@ -1997,24 +1997,19 @@ lp_build_size_query_soa(struct gallivm_state
> *gallivm,
>  
> assert(!int_type.floating);
>  
> -   lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128));
> +   lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
>  
> if (explicit_lod) {
>/* FIXME: this needs to honor per-element lod */
>lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
>lp_build_const_int32(gallivm, 0), "");
>first_level = dynamic_state->first_level(dynamic_state, gallivm,
>texture_unit);
>level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
> -  lod = lp_build_broadcast_scalar(&bld_i

Re: [Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler

2013-08-08 Thread Zack Rusin

 > Series looks good though I'm unsure why the pipeline stage doesn't work.
> Where does that decomposition happen? Is that something like GS
> outputting multiple prims in the same topology which all need the same id?

No, it's because the pipeline stage is ran on the decomposed primitives. The 
issue is that the pipeline stage is ran after stream output and stream output 
requires decomposed primitives, meaning that by the time we get to the pipeline 
we lost the original primitive info. The d3d10 wants the primitive id's to be 
injected into vertices but in the order in which they are traversed on the 
original (striped) primitives, so we need to do it when doing the original 
decomposition where we have access to the original topology and can number the 
vertices correctly.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] draw: rewrite primitive assembler

2013-08-08 Thread Zack Rusin

> Am 09.08.2013 00:40, schrieb Zack Rusin:
> >  > Series looks good though I'm unsure why the pipeline stage doesn't work.
> >> Where does that decomposition happen? Is that something like GS
> >> outputting multiple prims in the same topology which all need the same id?
> > 
> > No, it's because the pipeline stage is ran on the decomposed primitives.
> > The issue is that the pipeline stage is ran after stream output and stream
> > output requires decomposed primitives, meaning that by the time we get to
> > the pipeline we lost the original primitive info. The d3d10 wants the
> > primitive id's to be injected into vertices but in the order in which they
> > are traversed on the original (striped) primitives, so we need to do it
> > when doing the original decomposition where we have access to the original
> > topology and can number the vertices correctly.
> > 
> > z
> > 
> 
> I see I totally forgot stream out needs decomposed primitives, and I
> guess stream out (and prim assembler) can't run as an ordinary pipeline
> stage?

I was thinking about that when I was doing it and I thought it should be 
possible to rewrite SO as a pipeline stage, but we'd need to change the 
interface to include some sort of a prepare stage and then redo the code in so. 
Once so would be in a pipeline then we could think about primitive assembler, 
but that would require also more changes to the pipeline because we want to 
know if the primitives are adjacency primitives and pipeline stages get only 
tris/lines/points... and this was the point at which I went "screw it, i'm 
injecting prim ids in the primitive assembler".

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC]: gallium: add new float comparison opcodes returning integer booleans

2013-08-09 Thread Zack Rusin

- Original Message -
> This is a proposal for new comparison instructions, as the old ones
> don't really fit modern (graphic or opencl I guess for that matter)
> languages well.
> If you've got objections, think the naming is crazy or whatnot I'm open
> for suggestions :-). I would think this is not just a much better fit
> for d3d10/glsl but for hw as well.

Yea, that makes sense to me. Comparison instructions should return consistent 
results across types. I'd just add a line or so to the docs to make it explicit 
how they're different from the old opcodes, I expect that for people new to 
gallium it's going to be easy to miss.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: simplify geometry shader mask handling a bit

2013-08-12 Thread Zack Rusin

> From: Roland Scheidegger 
> 
> Instead of reducing masks to 0/1 simply use the mask directly as -1.
> Also use some signed comparison instead of unsigned (as far as I understand
> these values have to be (very) small and signed means llvm doesn't have to
> apply additional logic to do the unsigned comparisons the cpu can't do).
> Saves a couple of instructions in some test geometry shader here.
> 
> v2: that was a bit to much optimization, don't skip combining the masks...

k, I think that one looks good. 

Reviewed-by: Zack Rusin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] draw: simplify prim mask construction

2013-08-12 Thread Zack Rusin

Looks good.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> The code was quite weird, the second comparison was in fact a complete no-op
> and we can also do the comparison with the vector directly instead of scalar,
> which should not also be faster but it is way more obvious how that mask
> is actually going to look like.
> (Not sure how many instructions that saves as it turned out the mask wasn't
> used in the test geometry shader I used at all after all...)
> ---
>  src/gallium/auxiliary/draw/draw_llvm.c |   32
>  ++--
>  1 file changed, 10 insertions(+), 22 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c
> b/src/gallium/auxiliary/draw/draw_llvm.c
> index 68f6369..84e3392 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -2040,31 +2040,19 @@ generate_mask_value(struct draw_gs_llvm_variant
> *variant,
>  {
> struct gallivm_state *gallivm = variant->gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> -   LLVMValueRef bits[16];
> -   struct lp_type  mask_type = lp_int_type(gs_type);
> -   struct lp_type mask_elem_type = lp_elem_type(mask_type);
> -   LLVMValueRef mask_val = lp_build_const_vec(gallivm,
> -  mask_type,
> -  0);
> +   struct lp_type mask_type = lp_int_type(gs_type);
> +   LLVMValueRef num_prims;
> +   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
> unsigned i;
>  
> -   assert(gs_type.length <= Elements(bits));
> -
> -   for (i = gs_type.length; i >= 1; --i) {
> -  int idx = i - 1;
> -  LLVMValueRef ind = lp_build_const_int32(gallivm, i);
> -  bits[idx] = lp_build_compare(gallivm,
> -   mask_elem_type, PIPE_FUNC_GEQUAL,
> -   variant->num_prims, ind);
> -   }
> -   for (i = 0; i < gs_type.length; ++i) {
> -  LLVMValueRef ind = lp_build_const_int32(gallivm, i);
> -  mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind,
> "");
> +   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm,
> mask_type),
> +  variant->num_prims);
> +   for (i = 0; i <= gs_type.length; i++) {
> +  LLVMValueRef idx = lp_build_const_int32(gallivm, i);
> +  mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
> }
> -   mask_val = lp_build_compare(gallivm,
> -   mask_type, PIPE_FUNC_NOTEQUAL,
> -   mask_val,
> -   lp_build_const_int_vec(gallivm, mask_type,
> 0));
> +   mask_val = lp_build_compare(gallivm, mask_type,
> +   PIPE_FUNC_GREATER, num_prims, mask_val);
>  
> return mask_val;
>  }
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: fix exec_mask interaction with geometry shader after end of main

2013-08-12 Thread Zack Rusin

Ah, that looks like a great catch.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> Because we must maintain an exec_mask even if there's currently nothing
> on the mask stack, we can still have an exec_mask at the end of the program.
> Effectively, this mask should be set back to default when returning from
> main.
> Without relying on END/RET opcode (I think it's valid to have neither) it is
> actually difficult to do this, as there doesn't seem any reasonable place to
> do it, so instead let's just say the exec_mask is invalid outside main (which
> it really is effectively).
> The problem is that geometry shader called end_primitive outside the shader
> (in the epilogue), and as a result used a bogus mask, leading to bugs if we
> had to set the (somewhat misnamed) ret_in_main bit anywhere. So just avoid
> the mask combining function when called from outside the shader.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |2 +-
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   28
>  +++
>  2 files changed, 14 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index 495940c..5a9e8d0 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -466,7 +466,7 @@ lp_build_tgsi_llvm(
>  
> while (bld_base->pc != -1) {
>struct tgsi_full_instruction *instr = bld_base->instructions +
> - bld_base->pc;
> +   bld_base->pc;
>const struct tgsi_opcode_info *opcode_info =
>   tgsi_get_opcode_info(instr->Instruction.Opcode);
>if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 589ea4f..db8e997 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -2691,11 +2691,21 @@ end_primitive_masked(struct lp_build_tgsi_context *
> bld_base,
> LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>  
> if (bld->gs_iface->end_primitive) {
> +  struct lp_build_context *uint_bld = &bld_base->uint_bld;
>LLVMValueRef emitted_vertices_vec =
>   LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
>LLVMValueRef emitted_prims_vec =
>   LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
>  
> +  LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
> +   emitted_vertices_vec,
> +   uint_bld->zero);
> +  /* We need to combine the current execution mask with the mask
> + telling us which, if any, execution slots actually have
> + unemitted primitives, this way we make sure that end_primitives
> + executes only on the paths that have unflushed vertices */
> +  mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
> +
>bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
> emitted_vertices_vec,
> emitted_prims_vec);
> @@ -2735,20 +2745,7 @@ end_primitive(
> struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
>  
> if (bld->gs_iface->end_primitive) {
> -  LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>LLVMValueRef mask = mask_vec(bld_base);
> -  struct lp_build_context *uint_bld = &bld_base->uint_bld;
> -  LLVMValueRef emitted_verts = LLVMBuildLoad(
> - builder, bld->emitted_vertices_vec_ptr, "");
> -  LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
> -   emitted_verts,
> -   uint_bld->zero);
> -  /* We need to combine the current execution mask with the mask
> - telling us which, if any, execution slots actually have
> - unemitted primitives, this way we make sure that end_primitives
> - executes only on the paths that have unflushed vertices */
> -  mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
> -
>end_primitive_masked(bld_base, mask);
> }
>  }
> @@ -3148,8 +3145,9 @@ static void emit_epilogue(struct lp_build_tgsi_context
> * bld_base)
>LLVMValueRef total_emitted_vertice

Re: [Mesa-dev] [PATCH 3/3] gallivm: implement new float comparison instructions returning integer masks

2013-08-12 Thread Zack Rusin

Nice. The entire series looks good.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> FSEQ/FSGE/FSLT/FSNE work just the same as SEQ/SGE/SLT/SNE except skip the
> select.
> And just for consistency use the same appropriate ordered/unordered
> comparisons
> for the old opcodes as well.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |   81
>  +++-
>  1 file changed, 79 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index f461661..86c3249 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1094,6 +1094,70 @@ f2u_emit_cpu(
>  emit_data->args[0]);
>  }
>  
> +/* TGSI_OPCODE_FSET Helper (CPU Only) */
> +static void
> +fset_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data,
> +   unsigned pipe_func)
> +{
> +   LLVMValueRef cond;
> +
> +   if (pipe_func != PIPE_FUNC_NOTEQUAL) {
> +  cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
> +  emit_data->args[0], emit_data->args[1]);
> +   }
> +   else {
> +  cond = lp_build_cmp(&bld_base->base, pipe_func,
> +  emit_data->args[0], emit_data->args[1]);
> +
> +   }
> +   emit_data->output[emit_data->chan] = cond;
> +}
> +
> +
> +/* TGSI_OPCODE_FSEQ (CPU Only) */
> +static void
> +fseq_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
> +}
> +
> +/* TGSI_OPCODE_ISGE (CPU Only) */
> +static void
> +fsge_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +/* TGSI_OPCODE_ISLT (CPU Only) */
> +static void
> +fslt_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +/* TGSI_OPCODE_USNE (CPU Only) */
> +
> +static void
> +fsne_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
> +}
> +
>  /* TGSI_OPCODE_FLR (CPU Only) */
>  
>  static void
> @@ -1396,8 +1460,17 @@ set_emit_cpu(
> struct lp_build_emit_data * emit_data,
> unsigned pipe_func)
>  {
> -   LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
> -emit_data->args[0], emit_data->args[1]);
> +   LLVMValueRef cond;
> +
> +   if (pipe_func != PIPE_FUNC_NOTEQUAL) {
> +  cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
> +  emit_data->args[0], emit_data->args[1]);
> +   }
> +   else {
> +  cond = lp_build_cmp(&bld_base->base, pipe_func,
> +  emit_data->args[0], emit_data->args[1]);
> +
> +   }
> emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
>cond,
>bld_base->base.one,
> @@ -1716,6 +1789,10 @@ lp_set_default_actions_cpu(
> bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
>  
> bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] draw: make sure that the stages setup outputs

2013-08-12 Thread Zack Rusin

Calling the prepare outputs cleans up the slot assignments
for outputs, unfortunately aapoint and aaline didn't have
code to reset their slots after the initial setup, this
was messing up our slot assignments. The unfilled stage
was just missing the initial assignment of the face slot.
This fixes all of the reported piglit failures.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_context.c   |2 +
 src/gallium/auxiliary/draw/draw_pipe.h  |5 +-
 src/gallium/auxiliary/draw/draw_pipe_aaline.c   |   27 ---
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c  |   56 ++-
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c |2 +
 5 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 2d4843e..d1fac0c 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -564,6 +564,8 @@ draw_prepare_shader_outputs(struct draw_context *draw)
draw_remove_extra_vertex_attribs(draw);
draw_prim_assembler_prepare_outputs(draw->ia);
draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
+   draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
+   draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);
 }
 
 /**
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h 
b/src/gallium/auxiliary/draw/draw_pipe.h
index 7c9ed6c..ad3165f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -101,7 +101,10 @@ void draw_pipe_passthrough_tri(struct draw_stage *stage, 
struct prim_header *hea
 void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header 
*header);
 void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header 
*header);
 
-
+void draw_aapoint_prepare_outputs(struct draw_context *context,
+  struct draw_stage *stage);
+void draw_aaline_prepare_outputs(struct draw_context *context,
+ struct draw_stage *stage);
 void draw_unfilled_prepare_outputs(struct draw_context *context,
struct draw_stage *stage);
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c 
b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index aa88459..c44c236 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -692,13 +692,7 @@ aaline_first_line(struct draw_stage *stage, struct 
prim_header *header)
   return;
}
 
-   /* update vertex attrib info */
-   aaline->pos_slot = draw_current_shader_position_output(draw);;
-
-   /* allocate the extra post-transformed vertex attribute */
-   aaline->tex_slot = draw_alloc_extra_vertex_attrib(draw,
- TGSI_SEMANTIC_GENERIC,
- 
aaline->fs->generic_attrib);
+   draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);
 
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -953,6 +947,25 @@ aaline_set_sampler_views(struct pipe_context *pipe,
 }
 
 
+void
+draw_aaline_prepare_outputs(struct draw_context *draw,
+struct draw_stage *stage)
+{
+   struct aaline_stage *aaline = aaline_stage(stage);
+   const struct pipe_rasterizer_state *rast = draw->rasterizer;
+
+   /* update vertex attrib info */
+   aaline->pos_slot = draw_current_shader_position_output(draw);;
+
+   if (!rast->line_smooth)
+  return;
+
+   /* allocate the extra post-transformed vertex attribute */
+   aaline->tex_slot = draw_alloc_extra_vertex_attrib(draw,
+ TGSI_SEMANTIC_GENERIC,
+ 
aaline->fs->generic_attrib);
+}
+
 /**
  * Called by drivers that want to install this AA line prim stage
  * into the draw module's pipeline.  This will not be used if the
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c 
b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 0d7b88e..7ae1ddd 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -696,28 +696,7 @@ aapoint_first_point(struct draw_stage *stage, struct 
prim_header *header)
 */
bind_aapoint_fragment_shader(aapoint);
 
-   /* update vertex attrib info */
-   aapoint->pos_slot = draw_current_shader_position_output(draw);
-
-   /* allocate the extra post-transformed vertex attribute */
-   aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
-  TGSI_SEMANTIC_GENERIC,
-  
aapoint->fs->generic_attrib);
-   assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
-
-   /* find psize slot i

[Mesa-dev] [PATCH] llvmpipe: fix pipeline statistics with a null ps

2013-08-12 Thread Zack Rusin

If the fragment shader is null then pixel shader invocations have
to be equal to zero. And if we're running a null ps then clipper
invocations and primitives should be equal to zero but only
if both stancil and depth testing are disabled.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_query.c |   30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_query.c 
b/src/gallium/drivers/llvmpipe/lp_query.c
index cea2d07..fb24c36 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -32,6 +32,7 @@
 
 #include "draw/draw_context.h"
 #include "pipe/p_defines.h"
+#include "tgsi/tgsi_scan.h"
 #include "util/u_memory.h"
 #include "os/os_time.h"
 #include "lp_context.h"
@@ -95,6 +96,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
   union pipe_query_result *vresult)
 {
struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
unsigned num_threads = MAX2(1, screen->num_threads);
struct llvmpipe_query *pq = llvmpipe_query(q);
uint64_t *result = (uint64_t *)vresult;
@@ -166,11 +168,31 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
case PIPE_QUERY_PIPELINE_STATISTICS: {
   struct pipe_query_data_pipeline_statistics *stats =
  (struct pipe_query_data_pipeline_statistics *)vresult;
-  /* only ps_invocations come from binned query */
-  for (i = 0; i < num_threads; i++) {
- pq->stats.ps_invocations += pq->end[i];
+  /* If we're running on what's considrered a null fragment
+   * shader, i.e. fragment shader consisting of a single
+   * END opcode or if the fragment shader is null then
+   * the number of ps_invocations should be zero */
+  if (llvmpipe->fs && llvmpipe->fs->info.base.num_tokens > 1) {
+ /* only ps_invocations come from binned query */
+ for (i = 0; i < num_threads; i++) {
+pq->stats.ps_invocations += pq->end[i];
+ }
+ pq->stats.ps_invocations *=
+LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE;
+  } else {
+ /* 
+  * Clipper primitives and invocations are equal to zero
+  * if we're running a null fragment shader but only
+  * if both stencil and depth testing are disabled.
+  */
+ if (!llvmpipe->depth_stencil->depth.enabled &&
+ !llvmpipe->depth_stencil->stencil[0].enabled &&
+ !llvmpipe->depth_stencil->stencil[1].enabled) {
+pq->stats.c_primitives = 0;
+pq->stats.c_invocations = 0;
+ }
+ pq->stats.ps_invocations = 0;
   }
-  pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE;
   *stats = pq->stats;
}
   break;
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] llvmpipe: fix pipeline statistics with a null ps

2013-08-14 Thread Zack Rusin

If the fragment shader is null then pixel shader invocations have
to be equal to zero. And if we're running a null ps then clipper
invocations and primitives should be equal to zero but only
if both stancil and depth testing are disabled.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_rast.c|3 ++-
 src/gallium/drivers/llvmpipe/lp_rast_priv.h   |3 ++-
 src/gallium/drivers/llvmpipe/lp_setup_line.c  |3 ++-
 src/gallium/drivers/llvmpipe/lp_setup_point.c |3 ++-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c   |3 ++-
 src/gallium/drivers/llvmpipe/lp_setup_vbuf.c  |9 +++--
 src/gallium/drivers/llvmpipe/lp_state_fs.c|   24 +++-
 src/gallium/drivers/llvmpipe/lp_state_fs.h|4 
 8 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index 49cdbfe..af661e9 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -35,6 +35,7 @@
 #include "os/os_time.h"
 
 #include "lp_scene_queue.h"
+#include "lp_context.h"
 #include "lp_debug.h"
 #include "lp_fence.h"
 #include "lp_perf.h"
@@ -459,7 +460,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
   /* not very accurate would need a popcount on the mask */
   /* always count this not worth bothering? */
-  task->ps_invocations++;
+  task->ps_invocations += 1 * variant->ps_inv_multiplier;
 
   /* run shader on 4x4 block */
   BEGIN_JIT_CALL(state, task);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h 
b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index b8bc99c..41fe097 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -100,6 +100,7 @@ struct lp_rasterizer_task
/* occlude counter for visible pixels */
struct lp_jit_thread_data thread_data;
uint64_t ps_invocations;
+   uint8_t ps_inv_multiplier;
 
pipe_semaphore work_ready;
pipe_semaphore work_done;
@@ -308,7 +309,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
   /* not very accurate would need a popcount on the mask */
   /* always count this not worth bothering? */
-  task->ps_invocations++;
+  task->ps_invocations += 1 * variant->ps_inv_multiplier;
 
   /* run shader on 4x4 block */
   BEGIN_JIT_CALL(state, task);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c 
b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index a25a6b0..e1686ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -600,7 +600,8 @@ try_setup_line( struct lp_setup_context *setup,
 
LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries) {
+   if (lp_context->active_statistics_queries &&
+   !llvmpipe_rasterization_disabled(lp_context)) {
   lp_context->pipeline_statistics.c_primitives++;
}
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c 
b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index cbcc8d4..45068ec 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -384,7 +384,8 @@ try_setup_point( struct lp_setup_context *setup,
 
LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries) {
+   if (lp_context->active_statistics_queries &&
+   !llvmpipe_rasterization_disabled(lp_context)) {
   lp_context->pipeline_statistics.c_primitives++;
}
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 579f351..23bc6e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -340,7 +340,8 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries) {
+   if (lp_context->active_statistics_queries &&
+   !llvmpipe_rasterization_disabled(lp_context)) {
   lp_context->pipeline_statistics.c_primitives++;
}
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c 
b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 8173994..bf9f7e7 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -565,8 +565,13 @@ lp_setup_pipeline_statistics(
   stats->gs_invocations;
llvmpipe->pipeline_statistics.gs_primitives +=
   stats->gs_primitives;
-   llvmpipe->pipeline_statistics.c_invocations +=
-  stats->c_invocations;
+   if (!llvmpipe_rasterization_disabled(llvmpipe)) {
+  llvmpipe->pipeline_statistics.c_invocations +=

Re: [Mesa-dev] [PATCH] gallivm: already pass coords in the right place in the sampler interface

2013-08-14 Thread Zack Rusin

I have to admit that I don't know the sampling code, but the patches look good 
to me.

z

- Original Message -
> From: Roland Scheidegger 
> 
> This makes things a bit nicer, and more importantly it fixes an issue
> where a "downgraded" array texture (due to view reduced to 1 layer and
> addressed with (non-array) samplec instruction) would use the wrong
> coord as shadow reference value. (This could also be fixed by passing
> target through the sampler interface much the same way as is done for
> size queries, might do this eventually anyway.)
> And if we'd ever want to support (shadow) cube map arrays, we'd need
> 5 coords in any case.
> 
> v2: fix bugs (texel fetch using wrong layer coord for 1d, shadow tex
> using wrong shadow coord for 2d...). Plus need to project the shadow
> coord, and just for fun keep projecting the layer coord too.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h |2 +
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   28 +---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  159
>  +++--
>  3 files changed, 90 insertions(+), 99 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index c25d171..6d8fe88 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -335,7 +335,9 @@ texture_dims(enum pipe_texture_target tex)
> case PIPE_TEXTURE_2D_ARRAY:
> case PIPE_TEXTURE_RECT:
> case PIPE_TEXTURE_CUBE:
> +  return 2;
> case PIPE_TEXTURE_CUBE_ARRAY:
> +  assert(0);
>return 2;
> case PIPE_TEXTURE_3D:
>return 3;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 07ed48e..c312922 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1574,7 +1574,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> unsigned target = static_texture_state->target;
> unsigned dims = texture_dims(target);
> unsigned num_quads = type.length / 4;
> -   unsigned mip_filter;
> +   unsigned mip_filter, i;
> struct lp_build_sample_context bld;
> struct lp_static_sampler_state derived_sampler_state =
> *static_sampler_state;
> LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
> @@ -1726,30 +1726,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>}
> }
>  
> -   /*
> -* always use the same coords for layer, shadow cmp, should probably
> -* put that into gallivm sampler interface I get real tired shuffling
> -* coordinates.
> -*/
> -   newcoords[0] = coords[0]; /* 1st coord */
> -   newcoords[1] = coords[1]; /* 2nd coord */
> -   newcoords[2] = coords[2]; /* 3rd coord (for cube, 3d and layer) */
> -   newcoords[3] = coords[3]; /* 4th coord (intended for cube array layer) */
> -   newcoords[4] = coords[2]; /* shadow cmp coord */
> -   if (target == PIPE_TEXTURE_1D_ARRAY) {
> -  newcoords[2] = coords[1]; /* layer coord */
> -  /* FIXME: shadow cmp coord can be wrong if we don't take target from
> shader decl. */
> -   }
> -   else if (target == PIPE_TEXTURE_2D_ARRAY) {
> -  newcoords[2] = coords[2];
> -  newcoords[4] = coords[3];
> -   }
> -   else if (target == PIPE_TEXTURE_CUBE) {
> -  newcoords[4] = coords[3];
> -   }
> -   else if (target == PIPE_TEXTURE_CUBE_ARRAY) {
> -  assert(0); /* not handled */
> -  // layer coord is ok but shadow coord is impossible */
> +   for (i = 0; i < 5; i++) {
> +  newcoords[i] = coords[i];
> }
>  
> if (0) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index db8e997..cab53df 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1614,13 +1614,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> unsigned unit;
> LLVMValueRef lod_bias, explicit_lod;
> LLVMValueRef oow = NULL;
> -   LLVMValueRef coords[4];
> +   LLVMValueRef coords[5];
> LLVMValueRef offsets[3] = { NULL };
> struct lp_derivatives derivs;
> struct lp_derivatives *deriv_ptr = NULL;
> boolean scalar_lod;
> -   unsigned num_coords, num_derivs, num_offsets;
> -   unsigned i;
> +   unsigned num_derivs, num_offsets, i;
> +   unsigned shadow_coord = 0;
> +   unsigned layer_coord = 0;
>  
> if (!bld->sampler) {
>_debug_printf("warning: found texture instruction but no sampler
>generator supplied\n");
> @@ -1631,55 +1632,58 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> }
>  
> switch (inst->Texture.Texture) {
> -   case TGSI_TEXTURE_1D:
> -  num_coords = 1;
> -  num_offsets = 1;
> -  num_derivs = 1;
> -  break;
> case TGSI_TEXTURE_1D_ARRAY:
> -  num_coords = 2;
> +  layer_coord = 1;
> +  /* fallthrou

Re: [Mesa-dev] [PATCH] gallivm: do per-sample depth comparison instead of doing it post-filter

2013-08-14 Thread Zack Rusin

 > -  lp_build_sample_compare(&bld, newcoords[4], texel_out);
> +  if (0)
> + lp_build_sample_compare(&bld, newcoords[4], texel_out);
> }

What does this do? 
The rest looks good to me!

Reviewed-by: Zack Rusin  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] llvmpipe: fix stencil bug if we have both stencil and depth tests

2013-08-15 Thread Zack Rusin

- Original Message -
> From: Roland Scheidegger 
> 
> This is a very well hidden bug found by accident (only the fixed glean
> tstencil2 test so far seems to hit it).
> We must use new mask with combined s_pass values and orig_mask values
> for zpass/zfail stencil ops, otherwise both the sfail op and one of
> zpass/zfail op are applied (probably not hit in most tests because
> some of the ops tend to be KEEP usually).
> 
> Note: this is a candidate for the 9.2 branch.

Looks good
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] draw: handle nan clipdistance

2013-08-15 Thread Zack Rusin

If clipdistance for one of the vertices is nan (or inf) then the
entire primitive should be discarded.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_cliptest_tmp.h |2 +-
 src/gallium/auxiliary/draw/draw_llvm.c |3 ++
 src/gallium/auxiliary/draw/draw_pipe_clip.c|   13 +-
 src/gallium/auxiliary/gallivm/lp_bld_arit.c|   53 
 src/gallium/auxiliary/gallivm/lp_bld_arit.h|   11 +
 5 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h 
b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
index e4500db..fc54810 100644
--- a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -140,7 +140,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
  clipdist = out->data[cd[0]][i];
   else
  clipdist = out->data[cd[1]][i-4];
-  if (clipdist < 0)
+  if (clipdist < 0 || util_is_inf_or_nan(clipdist))
  mask |= 1 << plane_idx;
} else {
   if (dot4(clipvertex, plane[plane_idx]) < 0)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 84e3392..1e9eadb 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1261,6 +1261,7 @@ generate_clipmask(struct draw_llvm *llvm,
if (clip_user) {
   LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
   LLVMValueRef indices[3];
+  LLVMValueRef is_nan;
 
   /* userclip planes */
   while (ucp_enable) {
@@ -1280,6 +1281,8 @@ generate_clipmask(struct draw_llvm *llvm,
clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
 }
 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, 
zero, clipdist);
+is_nan = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
+test = LLVMBuildOr(builder, test, is_nan, "");
 temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
 test = LLVMBuildAnd(builder, test, temp, "");
 mask = LLVMBuildOr(builder, mask, test, "");
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index b76e9a5..2f2aadb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -104,7 +104,7 @@ static void interp_attr( float dst[4],
 float t,
 const float in[4],
 const float out[4] )
-{  
+{
dst[0] = LINTERP( t, out[0], in[0] );
dst[1] = LINTERP( t, out[1], in[1] );
dst[2] = LINTERP( t, out[2], in[2] );
@@ -380,6 +380,9 @@ do_clip_tri( struct draw_stage *stage,
   dp_prev = getclipdist(clipper, vert_prev, plane_idx);
   clipmask &= ~(1<= MAX_CLIPPED_VERTICES)
  return;
@@ -392,6 +395,9 @@ do_clip_tri( struct draw_stage *stage,
 
  float dp = getclipdist(clipper, vert, plane_idx);
 
+ if (util_is_inf_or_nan(dp))
+return; //discard nan
+
 if (!IS_NEGATIVE(dp_prev)) {
 assert(outcount < MAX_CLIPPED_VERTICES);
 if (outcount >= MAX_CLIPPED_VERTICES)
@@ -522,6 +528,9 @@ do_clip_line( struct draw_stage *stage,
   const float dp0 = getclipdist(clipper, v0, plane_idx);
   const float dp1 = getclipdist(clipper, v1, plane_idx);
 
+  if (util_is_inf_or_nan(dp0) || util_is_inf_or_nan(dp1))
+ return; //discard nan
+
   if (dp1 < 0.0F) {
 float t = dp1 / (dp1 - dp0);
  t1 = MAX2(t1, t);
@@ -594,7 +603,7 @@ clip_tri( struct draw_stage *stage,
unsigned clipmask = (header->v[0]->clipmask | 
 header->v[1]->clipmask | 
 header->v[2]->clipmask);
-   
+
if (clipmask == 0) {
   /* no clipping needed */
   stage->next->tri( stage->next, header );
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 98409c3..72b563e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -3671,3 +3671,56 @@ lp_build_isfinite(struct lp_build_context *bld,
return lp_build_compare(bld->gallivm, int_type, PIPE_FUNC_NOTEQUAL,
intx, infornan32);
 }
+
+/*
+ * Returns true if the number is nan or inf or false otherwise.
+ * The input has to be a floating point vector.
+ */
+LLVMValueRef
+lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
+   const struct lp_type type,
+   LLVMValueRef x)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_type int_type = lp_int_type(type);
+   LLVMVal

Re: [Mesa-dev] [PATCH] draw: handle nan clipdistance

2013-08-15 Thread Zack Rusin

> I realize this function isn't used but it looks unnecessarily
> complicated - two constants one AND plus one comparison when you could
> simply do a single comparison (compare x with x with unordered not
> equal). This is actually doubly bad with AVX because the int comparison
> is going to use 4 instructions instead of 1 (extract/2 cmp/1 insert),
> well if this runs 8-wide at least.

I'm going to kill that function, we already have lp_build_isnan that does the 
correct thing.

> Otherwise looks good. Though I'm not sure you really need to kill the
> prims if the clip distances are infinite?

The d3d10 spec says "Coordinates coming in to clipping with infinites at x, y, 
z may or may not result in a discarded primitive.". I liked handling them the 
same way as nan, otherwise we're just generating pointless primitives. I don't 
have a strong opinion though, wlk doesn't seem to test infinites.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] draw: fix PIPE_MAX_SAMPLER/PIPE_MAX_SHADER_SAMPLER_VIEWS issues

2013-08-30 Thread Zack Rusin

Looks good.

Reviewed-by: Zack Rusin 

- Original Message -
> From: Roland Scheidegger 
> 
> pstipple/aaline stages used PIPE_MAX_SAMPLER instead of
> PIPE_MAX_SHADER_SAMPLER_VIEWS when dealing with sampler views.
> Now these stages can't actually handle sampler_unit != texture_unit anyway
> (they cannot work with d3d10 shaders at all due to using tex not sample
> opcodes as "mixed mode" shaders are impossible) but this leads to crashes if
> a driver just installs these stages and then more than PIPE_MAX_SAMPLER views
> are set even if the stages aren't even used.
> ---
>  src/gallium/auxiliary/draw/draw_pipe_aaline.c   |6 +++---
>  src/gallium/auxiliary/draw/draw_pipe_pstipple.c |6 +++---
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
> b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
> index c44c236..8483bd7 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
> @@ -107,7 +107,7 @@ struct aaline_stage
> struct aaline_fragment_shader *fs;
> struct {
>void *sampler[PIPE_MAX_SAMPLERS];
> -  struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
> +  struct pipe_sampler_view
> *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> } state;
>  
> /*
> @@ -763,7 +763,7 @@ aaline_destroy(struct draw_stage *stage)
> struct pipe_context *pipe = stage->draw->pipe;
> uint i;
>  
> -   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
> +   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
>pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL);
> }
>  
> @@ -937,7 +937,7 @@ aaline_set_sampler_views(struct pipe_context *pipe,
> for (i = 0; i < num; i++) {
>pipe_sampler_view_reference(&aaline->state.sampler_views[i],
>views[i]);
> }
> -   for ( ; i < PIPE_MAX_SAMPLERS; i++) {
> +   for ( ; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
>pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL);
> }
> aaline->num_sampler_views = num;
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
> b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
> index 51f5a86..f38addd 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
> @@ -87,7 +87,7 @@ struct pstip_stage
> struct pstip_fragment_shader *fs;
> struct {
>void *samplers[PIPE_MAX_SAMPLERS];
> -  struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
> +  struct pipe_sampler_view
> *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>const struct pipe_poly_stipple *stipple;
> } state;
>  
> @@ -592,7 +592,7 @@ pstip_destroy(struct draw_stage *stage)
> struct pstip_stage *pstip = pstip_stage(stage);
> uint i;
>  
> -   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
> +   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
>pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL);
> }
>  
> @@ -731,7 +731,7 @@ pstip_set_sampler_views(struct pipe_context *pipe,
> for (i = 0; i < num; i++) {
>pipe_sampler_view_reference(&pstip->state.sampler_views[i], views[i]);
> }
> -   for (; i < PIPE_MAX_SAMPLERS; i++) {
> +   for (; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
>pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL);
> }
>  
> --
> 1.7.9.5
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] gallivm: handle unbound textures in texture sampling / texture queries

2013-08-30 Thread Zack Rusin

Same here.

- Original Message -
> Series LGTM.
> 
> Jose
> 
> - Original Message -
> > From: Roland Scheidegger 
> > 
> > Turns out we don't need to do much extra work for detecting this case,
> > since we are guaranteed to get a empty static texture state in this case,
> > hence just rely on format being 0 and return all zero then.
> > Previously needed dummy textures (would just have crashed on format being 0
> > otherwise) which cannot return the correct result for size queries and when
> > sampling textures with wrap modes using border.
> > As a bonus should hugely increase performance when sampling unbound
> > textures
> > -
> > too bad it isn't a useful feature :-).
> > ---
> >  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   26
> >  +
> >  1 file changed, 26 insertions(+)
> > 
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> > index db5e366..e0d3dd2 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> > @@ -2088,6 +2088,19 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> >debug_printf("Sample from %s\n", util_format_name(fmt));
> > }
> >  
> > +   if (static_texture_state->format == PIPE_FORMAT_NONE) {
> > +  /*
> > +   * If there's nothing bound, format is NONE, and we must return
> > +   * all zero as mandated by d3d10 in this case.
> > +   */
> > +  unsigned chan;
> > +  LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
> > +  for (chan = 0; chan < 4; chan++) {
> > + texel_out[chan] = zero;
> > +  }
> > +  return;
> > +   }
> > +
> > assert(type.floating);
> >  
> > /* Setup our build context */
> > @@ -2517,6 +2530,19 @@ lp_build_size_query_soa(struct gallivm_state
> > *gallivm,
> > unsigned num_lods = 1;
> > struct lp_build_context bld_int_vec4;
> >  
> > +   if (static_state->format == PIPE_FORMAT_NONE) {
> > +  /*
> > +   * If there's nothing bound, format is NONE, and we must return
> > +   * all zero as mandated by d3d10 in this case.
> > +   */
> > +  unsigned chan;
> > +  LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
> > +  for (chan = 0; chan < 4; chan++) {
> > + sizes_out[chan] = zero;
> > +  }
> > +  return;
> > +   }
> > +
> > /*
> >  * Do some sanity verification about bound texture and shader dcl
> >  target.
> >  * Not entirely sure what's possible but assume array/non-array
> > --
> > 1.7.9.5
> > 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallivm: support indirect registers on both dimensions

2013-09-03 Thread Zack Rusin

We support indirect addressing only on the vertex index, but some
shaders also use indirect addressing on attributes. This patch
adds support for indirect addressing on both dimensions inside
gs arrays.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c  | 23 +--
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h |  3 ++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |  4 +++-
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 820d6b0..03668d9 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1360,8 +1360,9 @@ clipmask_booli32(struct gallivm_state *gallivm,
 static LLVMValueRef
 draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
  struct lp_build_tgsi_context * bld_base,
- boolean is_indirect,
+ boolean is_vindex_indirect,
  LLVMValueRef vertex_index,
+ boolean is_aindex_indirect,
  LLVMValueRef attrib_index,
  LLVMValueRef swizzle_index)
 {
@@ -1372,18 +1373,28 @@ draw_gs_llvm_fetch_input(const struct 
lp_build_tgsi_gs_iface *gs_iface,
LLVMValueRef res;
struct lp_type type = bld_base->base.type;
 
-   if (is_indirect) {
+   if (is_vindex_indirect || is_aindex_indirect) {
   int i;
   res = bld_base->base.zero;
   for (i = 0; i < type.length; ++i) {
  LLVMValueRef idx = lp_build_const_int32(gallivm, i);
- LLVMValueRef vert_chan_index = LLVMBuildExtractElement(builder,
-vertex_index, 
idx, "");
+ LLVMValueRef vert_chan_index = vertex_index;
+ LLVMValueRef attr_chan_index = attrib_index;
  LLVMValueRef channel_vec, value;
+
+ if (is_vindex_indirect) {
+vert_chan_index = LLVMBuildExtractElement(builder,
+  vertex_index, idx, "");
+ }
+ if (is_aindex_indirect) {
+attr_chan_index = LLVMBuildExtractElement(builder,
+  attrib_index, idx, "");
+ }
+
  indices[0] = vert_chan_index;
- indices[1] = attrib_index;
+ indices[1] = attr_chan_index;
  indices[2] = swizzle_index;
- 
+
  channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
  channel_vec = LLVMBuildLoad(builder, channel_vec, "");
  value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 522302e..8bcdbc8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -395,8 +395,9 @@ struct lp_build_tgsi_gs_iface
 {
LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface,
struct lp_build_tgsi_context * bld_base,
-   boolean is_indirect,
+   boolean is_vindex_indirect,
LLVMValueRef vertex_index,
+   boolean is_aindex_indirect,
LLVMValueRef attrib_index,
LLVMValueRef swizzle_index);
void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 4c6b6ec..e50f1d1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1135,7 +1135,9 @@ emit_fetch_gs_input(
 
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
 reg->Dimension.Indirect,
-vertex_index, attrib_index,
+vertex_index,
+reg->Register.Indirect,
+attrib_index,
 swizzle_index);
 
assert(res);
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Revert "draw: cleanup the extra attribs"

2013-09-04 Thread Zack Rusin

> This reverts commit 57cd3267782fcf92d1e7d772760956516d4367df.
> 
> This fixes piglit regressions with additional draw stages on
> llvmpipe, softpipe and i915g. The attributes can't be cleared at
> this point because they might be in use by the additional draw
> stages.

The attributes have to cleared but the interface for looking them up has to be 
exactly the same in llvmpipe (i.e. only llvmpipe does it correctly).

> https://bugs.freedesktop.org/show_bug.cgi?id=67963
> https://bugs.freedesktop.org/show_bug.cgi?id=67965
> https://bugs.freedesktop.org/show_bug.cgi?id=67966

All of which have been fixed for a long time, just no one had the time to 
verify and close. In other words please don't revert, if you don't feel like 
changing the shader output lookup just remove the prepare_shader_outputs call, 
like I mentioned, and that should get you the old behavior back.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] draw: cleanup the extra attribs

2013-09-04 Thread Zack Rusin

Hi, Stéphane. 

No we should not revert to the old behavior. The old behavior was incorrect. 
Consider this: 

-- setup state that draws a wireframe -> draw should inject frontface 
-- the driver needs to be able to find the injected wireframe output 
-- draw 
-- setup state the draws solid fill with fragment shader using primid input -> 
draw should inject primid but not frontface 
-- driver needs to be able to find the injected primid but not frontface info 
-- draw 

Without cleaning the attributed before the second draw the draw will keep the 
frontface id in the extra attribs, incorrectly pointing the driver to a 
non-existing crash. That's why the attribs need to be cleaned before rendering. 

i915g simply shouldn't call draw_prepare_shader_outputs because it doesn't know 
what to do with the injected front-face or primid anyway. That part I'd suggest 
you remove. It will get you back to the old behavior. 

z 

- Original Message -

> Hi Zack,

> This change regresses a bunch of point sprite piglit tests on i915g. Should
> we revert back to the old behaviour? As far as I can see, it was correct (it
> was keeping the attributes in case another stage is using them).

> Stéphane

> On Thu, Aug 8, 2013 at 12:46 PM, Zack Rusin < za...@vmware.com > wrote:

> > Before inserting new front face and prim id outputs cleanup
> 
> > the old extra outputs, otherwise our cache will use previous
> 
> > output slots which will break as soon as outputs of the current
> 
> > shader don't match the last.
> 

> > Signed-off-by: Zack Rusin < za...@vmware.com >
> 
> > ---
> 
> > src/gallium/auxiliary/draw/draw_context.c | 1 +
> 
> > 1 file changed, 1 insertion(+)
> 

> > diff --git a/src/gallium/auxiliary/draw/draw_context.c
> > b/src/gallium/auxiliary/draw/draw_context.c
> 
> > index af9caee..2dc6772 100644
> 
> > --- a/src/gallium/auxiliary/draw/draw_context.c
> 
> > +++ b/src/gallium/auxiliary/draw/draw_context.c
> 
> > @@ -555,6 +555,7 @@ draw_get_shader_info(const struct draw_context *draw)
> 
> > void
> 
> > draw_prepare_shader_outputs(struct draw_context *draw)
> 
> > {
> 
> > + draw_remove_extra_vertex_attribs(draw);
> 
> > draw_ia_prepare_outputs(draw, draw->pipeline.ia);
> 
> > draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
> 
> > }
> 
> > --
> 
> > 1.7.10.4
> 
> > ___
> 
> > mesa-dev mailing list
> 
> > mesa-dev@lists.freedesktop.org
> 
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] util/u_blit: Implement util_blit_pixels via pipe_context::blit.

2013-09-17 Thread Zack Rusin

The entire series looks good to me.

Reviewed-by: Zack Rusin 

- Original Message -
> From: José Fonseca 
> 
> This removes a lot of code, but not everything, as util_blit_pixels_tex
> is still useful when one needs to override pipe_sampler_view::swizzle_?.
> ---
>  src/gallium/auxiliary/util/u_blit.c | 447
>  +++-
>  1 file changed, 37 insertions(+), 410 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_blit.c
> b/src/gallium/auxiliary/util/u_blit.c
> index e9bec4a..4ba71b9 100644
> --- a/src/gallium/auxiliary/util/u_blit.c
> +++ b/src/gallium/auxiliary/util/u_blit.c
> @@ -57,29 +57,20 @@ struct blit_state
> struct pipe_context *pipe;
> struct cso_context *cso;
>  
> -   struct pipe_blend_state blend_write_color, blend_keep_color;
> +   struct pipe_blend_state blend_write_color;
> struct pipe_depth_stencil_alpha_state dsa_keep_depthstencil;
> -   struct pipe_depth_stencil_alpha_state dsa_write_depthstencil;
> -   struct pipe_depth_stencil_alpha_state dsa_write_depth;
> -   struct pipe_depth_stencil_alpha_state dsa_write_stencil;
> struct pipe_rasterizer_state rasterizer;
> struct pipe_sampler_state sampler;
> struct pipe_viewport_state viewport;
> struct pipe_vertex_element velem[2];
> -   enum pipe_texture_target internal_target;
>  
> void *vs;
> void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1];
> -   void *fs_depthstencil[PIPE_MAX_TEXTURE_TYPES];
> -   void *fs_depth[PIPE_MAX_TEXTURE_TYPES];
> -   void *fs_stencil[PIPE_MAX_TEXTURE_TYPES];
>  
> struct pipe_resource *vbuf;  /**< quad vertices */
> unsigned vbuf_slot;
>  
> float vertices[4][2][4];   /**< vertex/texcoords for quad */
> -
> -   boolean has_stencil_export;
>  };
>  
>  
> @@ -103,20 +94,6 @@ util_create_blit(struct pipe_context *pipe, struct
> cso_context *cso)
> /* disabled blending/masking */
> ctx->blend_write_color.rt[0].colormask = PIPE_MASK_RGBA;
>  
> -   /* depth stencil states */
> -   ctx->dsa_write_depth.depth.enabled = 1;
> -   ctx->dsa_write_depth.depth.writemask = 1;
> -   ctx->dsa_write_depth.depth.func = PIPE_FUNC_ALWAYS;
> -   ctx->dsa_write_stencil.stencil[0].enabled = 1;
> -   ctx->dsa_write_stencil.stencil[0].func = PIPE_FUNC_ALWAYS;
> -   ctx->dsa_write_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
> -   ctx->dsa_write_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
> -   ctx->dsa_write_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
> -   ctx->dsa_write_stencil.stencil[0].valuemask = 0xff;
> -   ctx->dsa_write_stencil.stencil[0].writemask = 0xff;
> -   ctx->dsa_write_depthstencil.depth = ctx->dsa_write_depth.depth;
> -   ctx->dsa_write_depthstencil.stencil[0] =
> ctx->dsa_write_stencil.stencil[0];
> -
> /* rasterizer */
> ctx->rasterizer.cull_face = PIPE_FACE_NONE;
> ctx->rasterizer.half_pixel_center = 1;
> @@ -147,14 +124,6 @@ util_create_blit(struct pipe_context *pipe, struct
> cso_context *cso)
>ctx->vertices[i][1][3] = 1.0f; /* q */
> }
>  
> -   if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
> -  ctx->internal_target = PIPE_TEXTURE_2D;
> -   else
> -  ctx->internal_target = PIPE_TEXTURE_RECT;
> -
> -   ctx->has_stencil_export =
> -  pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
> -
> return ctx;
>  }
>  
> @@ -178,18 +147,6 @@ util_destroy_blit(struct blit_state *ctx)
>}
> }
>  
> -   for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
> -  if (ctx->fs_depthstencil[i]) {
> - pipe->delete_fs_state(pipe, ctx->fs_depthstencil[i]);
> -  }
> -  if (ctx->fs_depth[i]) {
> - pipe->delete_fs_state(pipe, ctx->fs_depth[i]);
> -  }
> -  if (ctx->fs_stencil[i]) {
> - pipe->delete_fs_state(pipe, ctx->fs_stencil[i]);
> -  }
> -   }
> -
> pipe_resource_reference(&ctx->vbuf, NULL);
>  
> FREE(ctx);
> @@ -217,63 +174,6 @@ set_fragment_shader(struct blit_state *ctx, uint
> writemask,
>  
>  
>  /**
> - * Helper function to set the shader which writes depth and stencil.
> - */
> -static INLINE void
> -set_depthstencil_fragment_shader(struct blit_state *ctx,
> - enum pipe_texture_target pipe_tex)
> -{
> -   if (!ctx->fs_depthstencil[pipe_tex]) {
> -  unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0);
> -
> -  ctx->fs_depthstencil[pipe_tex] =
> - util_make_fragment_tex_shader_writedepthstencil(ct

[Mesa-dev] [PATCH 1/3] llvmpipe: count c_primitives before discarding null prims

2013-09-19 Thread Zack Rusin

We need to count the clipper primitives before the rasterizer
discards one it considers to be null.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 23bc6e2..e61efd4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -252,7 +252,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
 const float (*v2)[4],
 boolean frontfacing )
 {
-   struct llvmpipe_context *lp_context = (struct llvmpipe_context 
*)setup->pipe;
struct lp_scene *scene = setup->scene;
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
struct lp_rast_triangle *tri;
@@ -340,11 +339,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries &&
-   !llvmpipe_rasterization_disabled(lp_context)) {
-  lp_context->pipeline_statistics.c_primitives++;
-   }
-
/* Setup parameter interpolants:
 */
setup->setup.variant->jit_function( v0,
@@ -803,7 +797,6 @@ static void retry_triangle_ccw( struct lp_setup_context 
*setup,
}
 }
 
-
 /**
  * Calculate fixed position data for a triangle
  */
@@ -1102,11 +1095,17 @@ static void triangle_both( struct lp_setup_context 
*setup,
   const float (*v2)[4] )
 {
struct fixed_position position;
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context 
*)setup->pipe;
 
if (setup->subdivide_large_triangles &&
check_subdivide_triangle(setup, v0, v1, v2, triangle_both))
   return;
 
+   if (lp_context->active_statistics_queries &&
+   !llvmpipe_rasterization_disabled(lp_context)) {
+  lp_context->pipeline_statistics.c_primitives++;
+   }
+
calc_fixed_position(setup, &position, v0, v1, v2);
 
if (0) {
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] llvmpipe: increase number of subpixel bits to eight

2013-09-19 Thread Zack Rusin

Unfortunately d3d10 requires a lot higher precision (e.g.
wgf11clipping tests for it). The smallest number of precision
bits with which it passes is 8. That means that we need to
decrease the maximum length of an edge that we can handle without
subdivision by 4 bits. Abstracted the code a bit to make it easier
to change once to switch to 64bit rasterization.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_rast.h  | 12 +++-
 src/gallium/drivers/llvmpipe/lp_setup.c | 14 +-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c |  2 +-
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h 
b/src/gallium/drivers/llvmpipe/lp_rast.h
index c57f2ea..b72be55 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -46,10 +46,20 @@ struct lp_scene;
 struct lp_fence;
 struct cmd_bin;
 
+#define FIXED_TYPE_WIDTH 32
 /** For sub-pixel positioning */
-#define FIXED_ORDER 4
+#define FIXED_ORDER 8
 #define FIXED_ONE (1<draw_regions[i]);
  }
   }
-  /* If the framebuffer is large we have to think about fixed-point
-   * integer overflow.  For 2K by 2K images, coordinates need 15 bits
-   * (2^11 + 4 subpixel bits).  The product of two such numbers would
-   * use 30 bits.  Any larger and we could overflow a 32-bit int.
-   *
-   * To cope with this problem we check if triangles are large and
-   * subdivide them if needed.
+  /* 
+   * Subdivide triangles if the framebuffer is larger than our 
+   * MAX_FIXED_LENGTH cab accomodate.
*/
-  setup->subdivide_large_triangles = (setup->fb.width > 2048 &&
-  setup->fb.height > 2048);
+  setup->subdivide_large_triangles = (setup->fb.width > MAX_FIXED_LENGTH &&
+  setup->fb.height > MAX_FIXED_LENGTH);
}
   
setup->dirty = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index e61efd4..ee30a64 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -988,7 +988,7 @@ check_subdivide_triangle(struct lp_setup_context *setup,
  const float (*v2)[4],
  triangle_func_t tri)
 {
-   const float maxLen = 2048.0f;  /* longest permissible edge, in pixels */
+   const float maxLen = MAX_FIXED_LENGTH;  /* longest permissible edge, in 
pixels */
float dx10, dy10, len10;
float dx21, dy21, len21;
float dx02, dy02, len02;
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] draw/clip: don't emit so many empty triangles

2013-09-19 Thread Zack Rusin

Compress empty triangles (don't emit more than one in a row) and
never emit empty triangles if we already generated a triangle
covering a non-null area. We can't skip all null-triangles
because c_primitives expects ones that were generated from vertices
exactly at the clipping-plane, to be emitted.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_clip.c | 39 +
 1 file changed, 39 insertions(+)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 0f90bfd..2d6df81 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -209,6 +209,29 @@ static void interp( const struct clip_stage *clip,
}
 }
 
+/**
+ * Checks whether the specifed triangle is empty and if it is returns
+ * true, otherwise returns false.
+ * Triangle is considered null/empty if it's area is qual to zero.
+ */
+static INLINE boolean
+is_tri_null(struct draw_context *draw, const struct prim_header *header)
+{
+   const unsigned pos_attr = draw_current_shader_position_output(draw);
+   float x1 = header->v[1]->data[pos_attr][0] - 
header->v[0]->data[pos_attr][0];
+   float y1 = header->v[1]->data[pos_attr][1] - 
header->v[0]->data[pos_attr][1];
+   float z1 = header->v[1]->data[pos_attr][2] - 
header->v[0]->data[pos_attr][2];
+
+   float x2 = header->v[2]->data[pos_attr][0] - 
header->v[0]->data[pos_attr][0];
+   float y2 = header->v[2]->data[pos_attr][1] - 
header->v[0]->data[pos_attr][1];
+   float z2 = header->v[2]->data[pos_attr][2] - 
header->v[0]->data[pos_attr][2];
+
+   float vx = y1 * z2 - z1 * y2;
+   float vy = x1 * z2 - z1 * x2;
+   float vz = x1 * y2 - y1 * x2;
+
+   return (vx*vx  + vy*vy + vz*vz) == 0.f;
+}
 
 /**
  * Emit a post-clip polygon to the next pipeline stage.  The polygon
@@ -223,6 +246,8 @@ static void emit_poly( struct draw_stage *stage,
struct prim_header header;
unsigned i;
ushort edge_first, edge_middle, edge_last;
+   boolean last_tri_was_null = FALSE;
+   boolean tri_was_not_null = FALSE;
 
if (stage->draw->rasterizer->flatshade_first) {
   edge_first  = DRAW_PIPE_EDGE_FLAG_0;
@@ -244,6 +269,7 @@ static void emit_poly( struct draw_stage *stage,
header.pad = 0;
 
for (i = 2; i < n; i++, header.flags = edge_middle) {
+  boolean tri_null;
   /* order the triangle verts to respect the provoking vertex mode */
   if (stage->draw->rasterizer->flatshade_first) {
  header.v[0] = inlist[0];  /* the provoking vertex */
@@ -256,6 +282,19 @@ static void emit_poly( struct draw_stage *stage,
  header.v[2] = inlist[0];  /* the provoking vertex */
   }
 
+  tri_null = is_tri_null(stage->draw, &header);
+  /* If we generated a triangle with an area, aka. non-null triangle, 
+   * or if the previous triangle was also null then skip all subsequent
+   * null triangles */
+  if ((tri_was_not_null && tri_null) || (last_tri_was_null && tri_null)) {
+ last_tri_was_null = tri_null;
+ continue;
+  }
+  last_tri_was_null = tri_null;
+  if (!tri_null) {
+ tri_was_not_null = TRUE;
+  }
+
   if (!edgeflags[i-1]) {
  header.flags &= ~edge_middle;
   }
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] llvmpipe: align the array used for subdivived vertices

2013-09-23 Thread Zack Rusin

When subdiving a triangle we're using a temporary array to store
the new coordinates for the subdivided triangles. Unfortunately
the array used for that was not aligned properly causing
random crashes in the llvm jit code which was trying to load
vectors from it.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 8b0fcd0..cf67f29 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -909,7 +909,7 @@ subdiv_tri(struct lp_setup_context *setup,
unsigned n = setup->fs.current.variant->shader->info.base.num_inputs + 1;
const struct lp_shader_input *inputs =
   setup->fs.current.variant->shader->inputs;
-   float vmid[PIPE_MAX_ATTRIBS][4];
+   PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) float vmid[PIPE_MAX_ATTRIBS][4];
const float (*vm)[4] = (const float (*)[4]) vmid;
unsigned i;
float w0, w1, wm;
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] llvmpipe: we need to subdivide if fb is bigger in either direction

2013-09-24 Thread Zack Rusin

We need to subdivide triangles if either of the dimensions is
larger than the max edge length, not when both of them are larger.

Signed-off-by: Zack Rusin 
---
 src/gallium/drivers/llvmpipe/lp_setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
b/src/gallium/drivers/llvmpipe/lp_setup.c
index 5fde01f..c8199b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -1015,7 +1015,7 @@ try_update_scene_state( struct lp_setup_context *setup )
* To cope with this problem we check if triangles are large and
* subdivide them if needed.
*/
-  setup->subdivide_large_triangles = (setup->fb.width > 2048 &&
+  setup->subdivide_large_triangles = (setup->fb.width > 2048 ||
   setup->fb.height > 2048);
}
   
-- 
1.8.3.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally

2013-02-14 Thread Zack Rusin

GLX_INTEL_swap_event is broken on the server side, where it's
currently unconditionally enabled. This completely breaks
systems running on drivers which don't support that extension.
There's no way to test for its presence on this side, so instead
of disabling it uncondtionally, just disable it for drivers
which are known to not support it. It makes sense because
most drivers do support it right now.
We'll be able to remove this once Xserver properly advertises
GLX_INTEL_swap_event.

Note: This is a candidate for the 9.0 branch.

Signed-off-by: Zack Rusin 
---
 src/glx/dri2_glx.c |   15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 46a92fd..05808d4 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -1051,7 +1051,8 @@ static const struct glx_context_vtable 
dri2_context_vtable = {
 };
 
 static void
-dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions)
+dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions,
+   const char *driverName)
 {
int i;
 
@@ -1060,7 +1061,15 @@ dri2BindExtensions(struct dri2_screen *psc, const 
__DRIextension **extensions)
__glXEnableDirectExtension(&psc->base, "GLX_MESA_swap_control");
__glXEnableDirectExtension(&psc->base, "GLX_SGI_make_current_read");
 
-   if (psc->dri2->base.version >= 4) {
+   /*
+* GLX_INTEL_swap_event is broken on the server side, where it's
+* currently unconditionally enabled. This completely breaks
+* systems running on drivers which don't support that extension.
+* There's no way to test for its presence on this side, so instead
+* of disabling it uncondtionally, just disable it for drivers
+* which are known to not support it.
+*/
+   if (strcmp(driverName, "vmwgfx") != 0) {
   __glXEnableDirectExtension(&psc->base, "GLX_INTEL_swap_event");
}
 
@@ -1204,7 +1213,7 @@ dri2CreateScreen(int screen, struct glx_display * priv)
}
 
extensions = psc->core->getExtensions(psc->driScreen);
-   dri2BindExtensions(psc, extensions);
+   dri2BindExtensions(psc, extensions, driverName);
 
configs = driConvertConfigs(psc->core, psc->base.configs, driver_configs);
visuals = driConvertConfigs(psc->core, psc->base.visuals, driver_configs);
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally

2013-02-15 Thread Zack Rusin

 > Seems like we should also fix the server to not advertise the extension
> if the driver doesn't have the appropriate hooks implemented.  But I
> have no problem with doing this on the client side too.

I've sent a patch last night to xorg-devel to handle it. But since it's likely 
to only be included in 1.14 it's very unlikely that it will help the currently 
broken distros, so that's why I wanted to have a stable Mesa release that works 
with either server. 
 
> Reviewed-by: Jesse Barnes 

Thanks!

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: Don't disable GLX_INTEL_swap_event unconditionally

2013-02-15 Thread Zack Rusin

> Should we also test for the swrast driver?

That is actually handled by a hack in the Xserver. 
http://cgit.freedesktop.org/xorg/xserver/commit/glx?id=988d7ace19a009991a4528e783d1a94c2444c66a
The extension was manually removed from the list of extensions that are at all 
available to the software rasterizer. It just hid the real problem. 

> Otherwise, looks good.
> 
> Reviewed-by: Brian Paul 

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/4] Fix geometry shaders in the draw module

2013-02-28 Thread Zack Rusin

This is a merge of Bryan's gs patches plus some work on top of them
that fixes the known issues with geometry shaders in the draw module.
I had to fix the llvm paths in the "account for separate shader objects"
commit because it broke them.
With this both softpipe and llvmpipe have working geometry shaders.

Bryan Cain (3):
  draw/gs: fix allocation of buffer for GS output vertices
  draw: account for separate shader objects in geometry shader code
  draw: use geometry shader info in clip_init_state if appropriate

Zack Rusin (1):
  draw/llvm: fix inputs to the geometry shader

 src/gallium/auxiliary/draw/draw_gs.c   |   32 +---
 src/gallium/auxiliary/draw/draw_gs.h   |2 ++
 src/gallium/auxiliary/draw/draw_llvm.c |   15 ++---
 src/gallium/auxiliary/draw/draw_pipe_clip.c|   15 +
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |1 +
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |2 ++
 6 files changed, 52 insertions(+), 15 deletions(-)

-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] draw/gs: fix allocation of buffer for GS output vertices

2013-02-28 Thread Zack Rusin

From: Bryan Cain 

Before, it accounted for the size of the vertices but not the other fields
in the vertex_header struct, which caused memory corruption.

Reviewed-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 5c55523..2ce1a2a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -401,7 +401,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
output_verts->vertex_size = input_verts->vertex_size;
output_verts->stride = input_verts->vertex_size;
output_verts->verts =
-  (struct vertex_header *)MALLOC(input_verts->vertex_size *
+  (struct vertex_header *)MALLOC(sizeof(struct vertex_header) +
+ input_verts->vertex_size *
  num_in_primitives *
  shader->max_output_vertices);
 
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] draw: account for separate shader objects in geometry shader code

2013-02-28 Thread Zack Rusin

From: Bryan Cain 

The geometry shader code seems to have been originally written with the
assumptions that there are the same number of VS outputs as GS outputs and
that VS outputs are in the same order as their corresponding GS inputs. Since
TGSI uses separate shader objects, these are both wrong assumptions. This
was causing several valid vertex/geometry shader combinations to either render
incorrectly or trigger an assertion.

Conflicts:
src/gallium/auxiliary/draw/draw_gs.c

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c   |   31 +---
 src/gallium/auxiliary/draw/draw_gs.h   |2 ++
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |1 +
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |2 ++
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 2ce1a2a..5247917 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -148,6 +148,22 @@ void draw_delete_geometry_shader(struct draw_context *draw,
FREE(dgs);
 }
 
+static INLINE int
+draw_gs_get_input_index(int semantic, int index,
+const struct tgsi_shader_info *input_info)
+{
+   int i;
+   const ubyte *input_semantic_names = input_info->output_semantic_name;
+   const ubyte *input_semantic_indices = input_info->output_semantic_index;
+   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+  if (input_semantic_names[i] == semantic &&
+  input_semantic_indices[i] == index)
+ return i;
+   }
+   debug_assert(0);
+   return -1;
+}
+
 /*#define DEBUG_OUTPUTS 1*/
 static INLINE void
 draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
@@ -228,6 +244,10 @@ static void draw_fetch_gs_input(struct 
draw_geometry_shader *shader,
 machine->Inputs[idx].xyzw[3].f[prim_idx] =
(float)shader->in_prim_idx;
  } else {
+vs_slot = draw_gs_get_input_index(
+shader->info.input_semantic_name[slot],
+shader->info.input_semantic_index[slot],
+shader->input_info);
 #if DEBUG_INPUTS
 debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
  slot, vs_slot, idx);
@@ -381,12 +401,14 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
  const unsigned 
constants_size[PIPE_MAX_CONSTANT_BUFFERS], 
  const struct draw_vertex_info *input_verts,
  const struct draw_prim_info *input_prim,
+ const struct tgsi_shader_info *input_info,
  struct draw_vertex_info *output_verts,
  struct draw_prim_info *output_prims )
 {
const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
unsigned input_stride = input_verts->vertex_size;
-   unsigned vertex_size = input_verts->vertex_size;
+   unsigned num_outputs = shader->info.num_outputs;
+   unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * 
sizeof(float);
struct tgsi_exec_machine *machine = shader->machine;
unsigned num_input_verts = input_prim->linear ?
   input_verts->count :
@@ -398,11 +420,11 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
 
shader->max_output_vertices)
 * num_in_primitives;
 
-   output_verts->vertex_size = input_verts->vertex_size;
-   output_verts->stride = input_verts->vertex_size;
+   output_verts->vertex_size = vertex_size;
+   output_verts->stride = output_verts->vertex_size;
output_verts->verts =
   (struct vertex_header *)MALLOC(sizeof(struct vertex_header) +
- input_verts->vertex_size *
+ output_verts->vertex_size *
  num_in_primitives *
  shader->max_output_vertices);
 
@@ -426,6 +448,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
shader->in_prim_idx = 0;
shader->input_vertex_stride = input_stride;
shader->input = input;
+   shader->input_info = input_info;
FREE(shader->primitive_lengths);
shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
 
diff --git a/src/gallium/auxiliary/draw/draw_gs.h 
b/src/gallium/auxiliary/draw/draw_gs.h
index bfac02c..5d10d0d 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -64,6 +64,7 @@ struct draw_geometry_shader {
unsigned in_prim_idx;
unsigned input_vertex_stride;
const float (*input)[4];
+   const struct tgs

[Mesa-dev] [PATCH 3/4] draw: use geometry shader info in clip_init_state if appropriate

2013-02-28 Thread Zack Rusin

From: Bryan Cain 


Reviewed-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_pipe_clip.c |   15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 3110809..eeaaf41 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -40,6 +40,7 @@
 #include "draw_vs.h"
 #include "draw_pipe.h"
 #include "draw_fs.h"
+#include "draw_gs.h"
 
 
 /** Set to 1 to enable printing of coords before/after clipping */
@@ -596,8 +597,10 @@ clip_init_state( struct draw_stage *stage )
 {
struct clip_stage *clipper = clip_stage( stage );
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
+   const struct draw_geometry_shader *gs = stage->draw->gs.geometry_shader;
const struct draw_fragment_shader *fs = stage->draw->fs.fragment_shader;
uint i;
+   struct tgsi_shader_info *vs_info = gs ? &gs->info : &vs->info;
 
/* We need to know for each attribute what kind of interpolation is
 * done on it (flat, smooth or noperspective).  But the information
@@ -640,16 +643,16 @@ clip_init_state( struct draw_stage *stage )
 
clipper->num_flat_attribs = 0;
memset(clipper->noperspective_attribs, 0, 
sizeof(clipper->noperspective_attribs));
-   for (i = 0; i < vs->info.num_outputs; i++) {
+   for (i = 0; i < vs_info->num_outputs; i++) {
   /* Find the interpolation mode for a specific attribute
*/
   int interp;
 
   /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
* from the array we've filled before. */
-  if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
-  vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- interp = indexed_interp[vs->info.output_semantic_index[i]];
+  if (vs_info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+  vs_info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ interp = indexed_interp[vs_info->output_semantic_index[i]];
   } else {
  /* Otherwise, search in the FS inputs, with a decent default
   * if we don't find it.
@@ -658,8 +661,8 @@ clip_init_state( struct draw_stage *stage )
  interp = TGSI_INTERPOLATE_PERSPECTIVE;
  if (fs) {
 for (j = 0; j < fs->info.num_inputs; j++) {
-   if (vs->info.output_semantic_name[i] == 
fs->info.input_semantic_name[j] &&
-   vs->info.output_semantic_index[i] == 
fs->info.input_semantic_index[j]) {
+   if (vs_info->output_semantic_name[i] == 
fs->info.input_semantic_name[j] &&
+   vs_info->output_semantic_index[i] == 
fs->info.input_semantic_index[j]) {
   interp = fs->info.input_interpolate[j];
   break;
}
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] draw/llvm: fix inputs to the geometry shader

2013-02-28 Thread Zack Rusin

We can't clip and viewport transform the vertices before we let
the geometry shader process them. Lets make sure the generated
vertex shader has both disabled if geometry shader is present.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c |   15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 8e46687..ff38a11 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1176,11 +1176,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef fetch_max;
struct lp_build_sampler_soa *sampler = 0;
-   LLVMValueRef ret, clipmask_bool_ptr;
-   const boolean bypass_viewport = variant->key.bypass_viewport;
-   const boolean enable_cliptest = variant->key.clip_xy || 
-   variant->key.clip_z  ||
-   variant->key.clip_user;
+   LLVMValueRef ret, clipmask_bool_ptr;   
+   const struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+   /* If geometry shader is present we need to skip both the viewport
+* transformation and clipping otherwise the inputs to the geometry
+* shader will be incorrect.
+*/
+   const boolean bypass_viewport = gs || variant->key.bypass_viewport;
+   const boolean enable_cliptest = !gs && (variant->key.clip_xy || 
+   variant->key.clip_z  ||
+   variant->key.clip_user);
LLVMValueRef variant_func;
const unsigned pos = draw_current_shader_position_output(llvm->draw);
const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] tgsi/exec: Correctly reset NumOutputs before parsing the shader

2013-03-07 Thread Zack Rusin

Whenever we're binding the shaders we're incrementing NumOutputs,
assuming the parser spots an output decleration, but we were never
reseting the variable. That means that each subsequent bind of
a geometry shader would add its number of output to the number
of output bound by all previously ran shaders and our indexes
would get completely messed up.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 6a74ef3..17ee079 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -681,6 +681,7 @@ tgsi_exec_machine_bind_shader(
 
mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
+   mach->NumOutputs = 0;
 
if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
!mach->UsedGeometryShader) {
@@ -1484,12 +1485,15 @@ store_dest(struct tgsi_exec_machine *mach,
  + reg->Register.Index;
   dst = &mach->Outputs[offset + index].xyzw[chan_index];
 #if 0
+  debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
+   mach->NumOutputs, 
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
+   reg->Register.Index);
   if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
- fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, 
execmask);
+ debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, 
execmask);
  for (i = 0; i < TGSI_QUAD_SIZE; i++)
 if (execmask & (1 << i))
-   fprintf(stderr, "%f, ", chan->f[i]);
- fprintf(stderr, ")\n");
+   debug_printf("%f, ", chan->f[i]);
+ debug_printf(")\n");
   }
 #endif
   break;
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] draw/gs: Correctly iterate the emitted primitives

2013-03-07 Thread Zack Rusin

We were assuming that each emitted primitive had the same
number of vertices. That is incorrect. Emitted primitives
can have arbirtrary number of vertices. Simply increment
index on iteration to fix it.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 99335af..e605965 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -172,6 +172,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader 
*shader,
 {
struct tgsi_exec_machine *machine = shader->machine;
unsigned prim_idx, j, slot;
+   unsigned current_idx = 0;
float (*output)[4];
 
output = *p_output;
@@ -184,9 +185,8 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader 
*shader,
   shader->primitive_lengths[prim_idx +   shader->emitted_primitives] =
  machine->Primitives[prim_idx];
   shader->emitted_vertices += num_verts_per_prim;
-  for (j = 0; j < num_verts_per_prim; j++) {
- int idx = (prim_idx * num_verts_per_prim + j) *
-   shader->info.num_outputs;
+  for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
+ int idx = current_idx * shader->info.num_outputs;
 #ifdef DEBUG_OUTPUTS
  debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
 #endif
@@ -208,7 +208,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader 
*shader,
   }
}
*p_output = output;
- shader->emitted_primitives += num_primitives;
+   shader->emitted_primitives += num_primitives;
 }
 
 /*#define DEBUG_INPUTS 1*/
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] draw/so: Fix stream output with geometry shaders

2013-03-12 Thread Zack Rusin

If geometry shader is present its stream output info should
be used instead of the vs and we shouldn't use the pre-clipped
corrdinates.

Signed-off-by: Zack Rusin 
---
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |2 +-
 src/gallium/auxiliary/draw/draw_pt_so_emit.c   |   37 
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 5fc9436..288b524 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -102,7 +102,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
 
-   draw_pt_so_emit_prepare( fpme->so_emit, TRUE );
+   draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
 
if (!(opt & PT_PIPELINE)) {
   draw_pt_emit_prepare( fpme->emit,
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c 
b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 13d8470..64ed544 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -27,6 +27,7 @@
 
 #include "draw/draw_private.h"
 #include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "draw/draw_vertex.h"
@@ -51,13 +52,37 @@ struct pt_so_emit {
unsigned generated_primitives;
 };
 
+static const struct pipe_stream_output_info *
+draw_so_info(const struct draw_context *draw)
+{
+   const struct pipe_stream_output_info *state = NULL;
+
+   if (draw->gs.geometry_shader) {
+  state = &draw->gs.geometry_shader->state.stream_output;
+   } else {
+  state = &draw->vs.vertex_shader->state.stream_output;
+   }
+
+   return state;
+}
+
+static INLINE boolean
+draw_has_so(const struct draw_context *draw)
+{
+   const struct pipe_stream_output_info *state = draw_so_info(draw);
+
+   if (state && state->num_outputs > 0)
+  return TRUE;
+
+   return FALSE;
+}
 
 void draw_pt_so_emit_prepare(struct pt_so_emit *emit, boolean use_pre_clip_pos)
 {
struct draw_context *draw = emit->draw;
 
emit->use_pre_clip_pos = use_pre_clip_pos;
-   emit->has_so = (draw->vs.vertex_shader->state.stream_output.num_outputs > 
0);
+   emit->has_so = draw_has_so(draw);
if (use_pre_clip_pos)
   emit->pos_idx = draw_current_shader_position_output(draw);
 
@@ -92,8 +117,7 @@ static void so_emit_prim(struct pt_so_emit *so,
struct draw_context *draw = so->draw;
const float (*input_ptr)[4];
const float (*pcp_ptr)[4] = NULL;
-   const struct pipe_stream_output_info *state =
-  &draw->vs.vertex_shader->state.stream_output;
+   const struct pipe_stream_output_info *state = draw_so_info(draw);
float *buffer;
int buffer_total_bytes[PIPE_MAX_SO_BUFFERS];
 
@@ -125,9 +149,8 @@ static void so_emit_prim(struct pt_so_emit *so,
for (i = 0; i < num_vertices; ++i) {
   const float (*input)[4];
   const float (*pre_clip_pos)[4];
-  unsigned total_written_compos = 0;
   int ob;
-  /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], 
prim_idx);*/
+
   input = (const float (*)[4])(
  (const char *)input_ptr + (indices[i] * input_vertex_stride));
 
@@ -145,11 +168,11 @@ static void so_emit_prim(struct pt_so_emit *so,
  buffer = (float *)((char *)draw->so.targets[ob]->mapping +
 draw->so.targets[ob]->target.buffer_offset +
 draw->so.targets[ob]->internal_offset) + 
state->output[slot].dst_offset;
+ 
  if (idx == so->pos_idx && pcp_ptr)
-memcpy(buffer, &pre_clip_pos[start_comp], num_comps * 
sizeof(float));
+memcpy(buffer, &pre_clip_pos[idx][start_comp], num_comps * 
sizeof(float));
  else
 memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float));
- total_written_compos += num_comps;
   }
   for (ob = 0; ob < draw->so.num_targets; ++ob)
  draw->so.targets[ob]->internal_offset += state->stride[ob] * 
sizeof(float);
-- 
1.7.10.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't

2013-03-12 Thread Zack Rusin

> If ddx does not support swap, don't advertise it.  We might also be
> able to get rid of the vmwgfx check (I'm not quite sure the purpose of
> that check vs. just checking dri2Minor.


No, not really. GLX_INTEL_swap_event doesn't have any hooks. You're checking 
for presence of generic swap extension.

Also unfortunately dri2minor has no correlation to glx_intel_swap_event. 
There's no way to check for presence of GLX_INTEL_swap_event on this side.


z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't

2013-03-12 Thread Zack Rusin

 > well, from what I can tell, if you advertise this extension
> applications will expect a swap event.  Which will never come if
> dri/glx on client side remaps scheduleswap to copyregion.
> 
> So maybe there are other conditions where we should not advertise this
> extension.  But if we know we will never get events, we should not
> advertise this extension.

The issue isn't on this side, it's on the Xserver side. We don't advertise 
extensions that aren't advertised by the server, unfortunately Xserver 
unconditionally enables this extension. I've sent a patch to xorg-devel at 
least limiting exposure ( 
http://lists.x.org/archives/xorg-devel/2013-February/035449.html ) but it 
hasn't been applied. The only reason for the vmwgfx hack is that we have a 
shipping driver that badly broke with the new Xserver so instead of leaving our 
users with broken systems we disable the extension on the client side. That 
isn't the correct approach though, in fact it's wrong, but it keeps those 
systems working until fixed xserver is out. I'd prefer to keep more hacks to 
fix this situation out of mesa.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't

2013-03-12 Thread Zack Rusin

> hmm, well, I think my fix is not incorrect.. we can tell from dri2
> proto version that the xserver does not support ScheduleSwap.  Maybe
> there should be other conditions where we also don't advertise this
> extension, but this patch still improves things.  If we absolutely
> know from the dri2 proto version that ScheduleSwap is not supported,
> then we should not advertise this extension.

And GLX_MESA_swap_control and GLX_SGI_make_current_read and the same for every 
extension which should be checked and advertised correctly by the Xserver. The 
issue is that you shouldn't worry about those because Xserver should check and 
advertise correctly what it supports. The issue with using swap_control symbols 
with checking for swap_event is that it creates arbitrary distinction between 
those two extensions on the client side only because Xserver does the correct 
thing for one of them and not the other.

> Without this, gnome-shell (and mutter/clutter) on freedreno is broken.
>  I'd rather not filter out based on the driver name, because when I
> eventually have a display driver where I can support swap, and bump
> the dri2 version #, I'd like this extension to be advertised.

TBH, I don't think you need this check at all, you just need a fixed xserver 
which doesn't advertise intel_swap_event if it doesn't support it. Until 
freedreno is shipped you don't have to worry about Xserver breaking the 
extension strings because you control the environment.
And just to be clear, I'm not nacking this patch, I just think it's silly to 
keep fixing Xserver bugs in mesa, but if you really hate the check for names, 
then please remove the strcpy vmwgfx and fix the comment above the check so 
that we have one master hack for this extension instead of accumulating a 
number of them.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] DRI2: don't advertise GLX_INTEL_swap_event if it can't

2013-03-13 Thread Zack Rusin

> well, I'm more familiar w/ EGL where we don't have the xserver
> advertising anything, and it is all on the client side.. but when it
> is an inexpensive check, it seems reasonable to want mesa to do the
> right thing where possible. 

It's simply silly. In the same sense that adding yet another if (ptr) to "if 
(ptr) if (ptr) FREE(ptr);" while not technically wrong is simply silly. Like I 
said we already check whether those extensions are advertised by the server and 
don't advertise the ones that aren't.

> Probably there are other cases where we
> should do the same thing.  I can update my patch to also exclude other
> extensions

No, the point it that we don't want to do that. It's fundamentally broken and 
you know that it's broken because you'll notice that this extension is still 
advertised by the server (for our sake that's all required to fix Clutter, but 
it's still broken). It's a weird thing for an extension which is implemented by 
the server to be advertised by the server and yet having a client which is 
essentially not involved at all, not be advertising it. The only reason we have 
to worry about this is that the server is broken. So while we might want to 
make things easier on us by not forcing users to keep repatching the Xserver we 
shouldn't have any illusions about what this is: it's a nasty hack required by 
a bug in the Xserver. As such that code has only two requirements:
1) That all drivers requiring that hack go through the same codepath and that 
it's as minimal as possible so it's trivial to remove it once a fixed Xserver 
gets into most distros.
2) That it's clearly documented as hack thanks to which anyone reading this 
code will immediately understand what's the purpose of the weird code and what 
are the prerequisites for removing it.
Everything else is of no consequence in this case. So whether you'll decide to 
use names or some any number of other extensions that came after dri2inforec 
version 4 to check for makes no difference as long as it fulfills the two above 
goals. 

> true, it is not shipping in any distro yet, so anyone who wants to try
> it gets to try git master of mesa, which runs into problems because of
> advertising the INTEL_swap extension.  Asking everyone to rebuild
> xserver with some extra patch which is not merged yet is a big pita.

Sure, but at the same time adding hacks to shared mesa code to make it easier 
to try a dev driver doesn't make terribly convincing argument. In the end 
though, at least in this case, the bug is severe enough that a hack in mesa 
makes sense and we've spent too much time discussing a very simple issue, so 
whatever you do just please make sure to fulfill the two requirements above and 
everything will be ok.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/6] graw/gs: add missing max output vertices to all tests

2013-03-26 Thread Zack Rusin

A few tests were missing this crucial property.

Signed-off-by: Zack Rusin 
---
 src/gallium/tests/graw/geometry-shader/add-mix.txt   |1 +
 src/gallium/tests/graw/geometry-shader/add.txt   |1 +
 src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt |1 +
 src/gallium/tests/graw/geometry-shader/mov.txt   |1 +
 4 files changed, 4 insertions(+)

diff --git a/src/gallium/tests/graw/geometry-shader/add-mix.txt 
b/src/gallium/tests/graw/geometry-shader/add-mix.txt
index 63e689a..bbe4164 100644
--- a/src/gallium/tests/graw/geometry-shader/add-mix.txt
+++ b/src/gallium/tests/graw/geometry-shader/add-mix.txt
@@ -1,6 +1,7 @@
 GEOM
 PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
 PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
+PROPERTY GS_MAX_OUTPUT_VERTICES 3
 DCL IN[][0], POSITION, CONSTANT
 DCL IN[][1], COLOR, CONSTANT
 DCL OUT[0], POSITION, CONSTANT
diff --git a/src/gallium/tests/graw/geometry-shader/add.txt 
b/src/gallium/tests/graw/geometry-shader/add.txt
index d8c7c41..8373dac 100644
--- a/src/gallium/tests/graw/geometry-shader/add.txt
+++ b/src/gallium/tests/graw/geometry-shader/add.txt
@@ -1,6 +1,7 @@
 GEOM
 PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
 PROPERTY GS_OUTPUT_PRIMITIVE LINE_STRIP
+PROPERTY GS_MAX_OUTPUT_VERTICES 3
 DCL IN[][0], POSITION, CONSTANT
 DCL IN[][1], COLOR, CONSTANT
 DCL OUT[0], POSITION, CONSTANT
diff --git a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt 
b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt
index 058acfb..339fd1e 100644
--- a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt
+++ b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt
@@ -1,6 +1,7 @@
 GEOM
 PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
 PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
+PROPERTY GS_MAX_OUTPUT_VERTICES 3
 DCL IN[][0], POSITION, CONSTANT
 DCL IN[][1], COLOR, CONSTANT
 DCL OUT[0], POSITION, CONSTANT
diff --git a/src/gallium/tests/graw/geometry-shader/mov.txt 
b/src/gallium/tests/graw/geometry-shader/mov.txt
index 97150a5..ad141f1 100644
--- a/src/gallium/tests/graw/geometry-shader/mov.txt
+++ b/src/gallium/tests/graw/geometry-shader/mov.txt
@@ -1,6 +1,7 @@
 GEOM
 PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
 PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
+PROPERTY GS_MAX_OUTPUT_VERTICES 3
 DCL IN[][0], POSITION, CONSTANT
 DCL IN[][1], COLOR, CONSTANT
 DCL OUT[0], POSITION, CONSTANT
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/6] gallivm: implement breakc and implicit primitive flushing

2013-03-26 Thread Zack Rusin

we were missing implementation of the breakc instruction and our
TGSI semantics currently require an implicit endprim at the end
of GS if none is present - this implements both.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|6 
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   38 
 3 files changed, 45 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 4c6456e..4acc592 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -392,6 +392,12 @@ struct lp_build_tgsi_soa_context
LLVMValueRef emitted_prims_vec;
LLVMValueRef total_emitted_vertices_vec;
LLVMValueRef emitted_vertices_vec;
+   /* if a shader doesn't have ENDPRIM instruction but it has
+* a number of EMIT instructions it means the END instruction
+* implicitly invokes ENDPRIM. handle this via a flag here
+* in the future maybe we can enforce TGSI to always have
+* an explicit ENDPRIM */
+   boolean pending_end_primitive;
 
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 41ddd99..55bb8e3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -867,6 +867,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * 
bld_base)
bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = 
scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 95633ab..36e49ac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -213,6 +213,23 @@ static void lp_exec_break(struct lp_exec_mask *mask)
lp_exec_mask_update(mask);
 }
 
+
+static void lp_exec_break_condition(struct lp_exec_mask *mask, LLVMValueRef 
cond)
+{
+   LLVMBuilderRef builder = mask->bld->gallivm->builder;
+   LLVMValueRef exec_mask = LLVMBuildNot(builder,
+ mask->exec_mask,
+ "break");
+
+   exec_mask = LLVMBuildAnd(builder, exec_mask, cond, "");
+
+   mask->break_mask = LLVMBuildAnd(builder,
+   mask->break_mask,
+   exec_mask, "break_full");
+
+   lp_exec_mask_update(mask);
+}
+
 static void lp_exec_continue(struct lp_exec_mask *mask)
 {
LLVMBuilderRef builder = mask->bld->gallivm->builder;
@@ -2190,6 +2207,7 @@ emit_vertex(
  LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, "");
   bld->total_emitted_vertices_vec =
  LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, 
"");
+  bld->pending_end_primitive = TRUE;
}
 }
 
@@ -2212,6 +2230,7 @@ end_primitive(
   bld->emitted_prims_vec =
  LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, "");
   bld->emitted_vertices_vec = bld_base->uint_bld.zero;
+  bld->pending_end_primitive = FALSE;
}
 }
 
@@ -2250,6 +2269,17 @@ brk_emit(
 }
 
 static void
+breakc_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   lp_exec_break_condition(&bld->exec_mask, emit_data->args[0]);
+}
+
+static void
 if_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -2504,6 +2534,12 @@ static void emit_epilogue(struct lp_build_tgsi_context * 
bld_base)
/* If we have indirect addressing in outputs we need to copy our alloca 
array
 * to the outputs slots specified by the caller */
if (bld->gs_args) {
+  /* flush the accumulated vertices as a primitive */
+  if (bld->pending_end_primitive) {
+ end_primitive(NULL, bld_base, NULL);
+ bld->pending_end_primitive = FALSE;
+  }
+  
   bld->gs_args->gs_epilogue(&bld->bld_base,
 bld->total_emitted_vertices_vec,
 bld->emitted_p

[Mesa-dev] [PATCH 0/6] Gallivm GS and related cleanups

2013-03-26 Thread Zack Rusin

This set implements code generation of geometry shaders in
the LLVM paths. There are some cleanups that will follow
(e.g. changing the input array to handle lp_native vectors
and not TGSI_NUM_CHANNELS) but all the simple examples
are working and as far as I can tell there's no regressions
in the common code so we can push it now.

Zack Rusin (6):
  graw/gs: add missing max output vertices to all tests
  draw/llvm: Remove unused gs_constants from jit_context
  draw/gs: Abstract the portions of GS that are tgsi specific
  draw/gs: Fetch more than one primitive per invocation
  gallium/llvm: implement geometry shaders in the llvm paths
  gallivm: implement breakc and implicit primitive flushing

 src/gallium/auxiliary/draw/draw_context.c  |   17 +-
 src/gallium/auxiliary/draw/draw_context.h  |5 +
 src/gallium/auxiliary/draw/draw_gs.c   |  585 +++-
 src/gallium/auxiliary/draw/draw_gs.h   |   41 +-
 src/gallium/auxiliary/draw/draw_llvm.c |  584 +--
 src/gallium/auxiliary/draw/draw_llvm.h |  176 +-
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   |   77 ++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   33 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|  209 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c |4 +-
 src/gallium/tests/graw/geometry-shader/add-mix.txt |1 +
 src/gallium/tests/graw/geometry-shader/add.txt |1 +
 .../tests/graw/geometry-shader/mov-cb-2d.txt   |1 +
 src/gallium/tests/graw/geometry-shader/mov.txt |1 +
 15 files changed, 1511 insertions(+), 225 deletions(-)

-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/6] draw/llvm: Remove unused gs_constants from jit_context

2013-03-26 Thread Zack Rusin

The member was never used and we'll need to handle it differently
because gs will also need samplers/textures setup.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c|   17 +++--
 src/gallium/auxiliary/draw/draw_llvm.h|   15 ---
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c  |4 
 3 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 20c9b79..602839d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -189,18 +189,17 @@ create_jit_context_type(struct gallivm_state *gallivm,
 {
LLVMTargetDataRef target = gallivm->target;
LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
-   LLVMTypeRef elem_types[6];
+   LLVMTypeRef elem_types[5];
LLVMTypeRef context_type;
 
elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* 
vs_constants */
  LP_MAX_TGSI_CONST_BUFFERS);
-   elem_types[1] = elem_types[0]; /* gs_constants */
-   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
+   elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  DRAW_TOTAL_CLIP_PLANES), 0);
-   elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
-   elem_types[4] = LLVMArrayType(texture_type,
+   elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
+   elem_types[3] = LLVMArrayType(texture_type,
  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
-   elem_types[5] = LLVMArrayType(sampler_type,
+   elem_types[4] = LLVMArrayType(sampler_type,
  PIPE_MAX_SAMPLERS); /* samplers */
context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
   Elements(elem_types), 0);
@@ -212,12 +211,10 @@ create_jit_context_type(struct gallivm_state *gallivm,
 
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
   target, context_type, 0);
-   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
-  target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
-  target, context_type, 2);
+  target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewport,
-  target, context_type, 3);
+  target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
   target, context_type,
   DRAW_JIT_CTX_TEXTURES);
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index c9f125b..c03c69e 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -106,9 +106,6 @@ enum {
DRAW_JIT_VERTEX_DATA
 };
 
-#define DRAW_JIT_CTX_TEXTURES 4
-#define DRAW_JIT_CTX_SAMPLERS 5
-
 /**
  * This structure is passed directly to the generated vertex shader.
  *
@@ -123,7 +120,6 @@ enum {
 struct draw_jit_context
 {
const float *vs_constants[LP_MAX_TGSI_CONST_BUFFERS];
-   const float *gs_constants[LP_MAX_TGSI_CONST_BUFFERS];
float (*planes) [DRAW_TOTAL_CLIP_PLANES][4];
float *viewport;
 
@@ -135,17 +131,14 @@ struct draw_jit_context
 #define draw_jit_context_vs_constants(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, 0, "vs_constants")
 
-#define draw_jit_context_gs_constants(_gallivm, _ptr) \
-   lp_build_struct_get_ptr(_gallivm, _ptr, 1, "gs_constants")
-
 #define draw_jit_context_planes(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 2, "planes")
+   lp_build_struct_get(_gallivm, _ptr, 1, "planes")
 
 #define draw_jit_context_viewport(_gallivm, _ptr) \
-   lp_build_struct_get(_gallivm, _ptr, 3, "viewport")
+   lp_build_struct_get(_gallivm, _ptr, 2, "viewport")
 
-#define DRAW_JIT_CTX_TEXTURES 4
-#define DRAW_JIT_CTX_SAMPLERS 5
+#define DRAW_JIT_CTX_TEXTURES 3
+#define DRAW_JIT_CTX_SAMPLERS 4
 
 #define draw_jit_context_textures(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_CTX_TEXTURES, "textures")
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 288b524..fab168c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -200,10 +200,6 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end 
*middle)
   fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
}
 
-   for (i = 0; i < Elements(fpme->llvm->jit_conte

[Mesa-dev] [PATCH 3/6] draw/gs: Abstract the portions of GS that are tgsi specific

2013-03-26 Thread Zack Rusin

To be able to add llvm paths later on we need to have some common
interface for them.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c |  270 ++
 src/gallium/auxiliary/draw/draw_gs.h |   14 ++
 2 files changed, 156 insertions(+), 128 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index e605965..81d9140 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -42,112 +42,6 @@
 /* fixme: move it from here */
 #define MAX_PRIMITIVES 64
 
-boolean
-draw_gs_init( struct draw_context *draw )
-{
-   draw->gs.tgsi.machine = tgsi_exec_machine_create();
-   if (!draw->gs.tgsi.machine)
-  return FALSE;
-
-   draw->gs.tgsi.machine->Primitives = align_malloc(
-  MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
-   if (!draw->gs.tgsi.machine->Primitives)
-  return FALSE;
-   memset(draw->gs.tgsi.machine->Primitives, 0,
-  MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
-
-   return TRUE;
-}
-
-void draw_gs_destroy( struct draw_context *draw )
-{
-   if (!draw->gs.tgsi.machine)
-  return;
-
-   align_free(draw->gs.tgsi.machine->Primitives);
-
-   tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
-}
-
-struct draw_geometry_shader *
-draw_create_geometry_shader(struct draw_context *draw,
-const struct pipe_shader_state *state)
-{
-   struct draw_geometry_shader *gs;
-   unsigned i;
-
-   gs = CALLOC_STRUCT(draw_geometry_shader);
-
-   if (!gs)
-  return NULL;
-
-   gs->draw = draw;
-   gs->state = *state;
-   gs->state.tokens = tgsi_dup_tokens(state->tokens);
-   if (!gs->state.tokens) {
-  FREE(gs);
-  return NULL;
-   }
-
-   tgsi_scan_shader(state->tokens, &gs->info);
-
-   /* setup the defaults */
-   gs->input_primitive = PIPE_PRIM_TRIANGLES;
-   gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
-   gs->max_output_vertices = 32;
-
-   for (i = 0; i < gs->info.num_properties; ++i) {
-  if (gs->info.properties[i].name ==
-  TGSI_PROPERTY_GS_INPUT_PRIM)
- gs->input_primitive = gs->info.properties[i].data[0];
-  else if (gs->info.properties[i].name ==
-   TGSI_PROPERTY_GS_OUTPUT_PRIM)
- gs->output_primitive = gs->info.properties[i].data[0];
-  else if (gs->info.properties[i].name ==
-   TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
- gs->max_output_vertices = gs->info.properties[i].data[0];
-   }
-
-   gs->machine = draw->gs.tgsi.machine;
-
-   if (gs)
-   {
-  uint i;
-  for (i = 0; i < gs->info.num_outputs; i++) {
- if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
- gs->info.output_semantic_index[i] == 0)
-gs->position_output = i;
-  }
-   }
-
-   return gs;
-}
-
-void draw_bind_geometry_shader(struct draw_context *draw,
-   struct draw_geometry_shader *dgs)
-{
-   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
-
-   if (dgs) {
-  draw->gs.geometry_shader = dgs;
-  draw->gs.num_gs_outputs = dgs->info.num_outputs;
-  draw->gs.position_output = dgs->position_output;
-  draw_geometry_shader_prepare(dgs, draw);
-   }
-   else {
-  draw->gs.geometry_shader = NULL;
-  draw->gs.num_gs_outputs = 0;
-   }
-}
-
-void draw_delete_geometry_shader(struct draw_context *draw,
- struct draw_geometry_shader *dgs)
-{
-   FREE(dgs->primitive_lengths);
-   FREE((void*) dgs->state.tokens);
-   FREE(dgs);
-}
-
 static INLINE int
 draw_gs_get_input_index(int semantic, int index,
 const struct tgsi_shader_info *input_info)
@@ -165,10 +59,10 @@ draw_gs_get_input_index(int semantic, int index,
 }
 
 /*#define DEBUG_OUTPUTS 1*/
-static INLINE void
-draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
-unsigned num_primitives,
-float (**p_output)[4])
+static void
+tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
+  unsigned num_primitives,
+  float (**p_output)[4])
 {
struct tgsi_exec_machine *machine = shader->machine;
unsigned prim_idx, j, slot;
@@ -212,7 +106,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader 
*shader,
 }
 
 /*#define DEBUG_INPUTS 1*/
-static void draw_fetch_gs_input(struct draw_geometry_shader *shader,
+static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
 unsigned *indices,
 unsigned num_vertices,
 unsigned prim_idx)
@@ -275,14 +169,20 @@ static void draw_fetch_gs_input(struct 
draw_geometry_shader *shader,
}
 }
 
-static void gs_flush(struct draw_geometry_shader *shader,
-

[Mesa-dev] [PATCH 4/6] draw/gs: Fetch more than one primitive per invocation

2013-03-26 Thread Zack Rusin

Allows executing gs on up to 4 primitives at a time. Will also be
required by the llvm code because there we definitely don't want
to flush with just a single primitive.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c |   54 ++
 src/gallium/auxiliary/draw/draw_gs.h |1 +
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 81d9140..2b50c9c 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -58,6 +58,12 @@ draw_gs_get_input_index(int semantic, int index,
return -1;
 }
 
+static INLINE boolean
+draw_gs_should_flush(struct draw_geometry_shader *shader)
+{
+   return (shader->fetched_prim_count == 4);
+}
+
 /*#define DEBUG_OUTPUTS 1*/
 static void
 tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
@@ -197,13 +203,14 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader 
*shader,
   
machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
 }
 
-static void gs_flush(struct draw_geometry_shader *shader,
- unsigned input_primitives)
+static void gs_flush(struct draw_geometry_shader *shader)
 {
unsigned out_prim_count;
 
+   unsigned input_primitives = shader->fetched_prim_count;
+
debug_assert(input_primitives > 0 &&
-input_primitives < 4);
+input_primitives <= 4);
 
out_prim_count = shader->run(shader, input_primitives);
 #if 0
@@ -213,6 +220,7 @@ static void gs_flush(struct draw_geometry_shader *shader,
 #endif
shader->fetch_outputs(shader, out_prim_count,
  &shader->tmp_output);
+   shader->fetched_prim_count = 0;
 }
 
 static void gs_point(struct draw_geometry_shader *shader,
@@ -222,10 +230,12 @@ static void gs_point(struct draw_geometry_shader *shader,
 
indices[0] = idx;
 
-   shader->fetch_inputs(shader, indices, 1, 0);
+   shader->fetch_inputs(shader, indices, 1,
+shader->fetched_prim_count);
++shader->in_prim_idx;
+   ++shader->fetched_prim_count;
 
-   gs_flush(shader, 1);
+   gs_flush(shader);
 }
 
 static void gs_line(struct draw_geometry_shader *shader,
@@ -236,10 +246,12 @@ static void gs_line(struct draw_geometry_shader *shader,
indices[0] = i0;
indices[1] = i1;
 
-   shader->fetch_inputs(shader, indices, 2, 0);
+   shader->fetch_inputs(shader, indices, 2,
+shader->fetched_prim_count);
++shader->in_prim_idx;
+   ++shader->fetched_prim_count;
 
-   gs_flush(shader, 1);
+   gs_flush(shader);
 }
 
 static void gs_line_adj(struct draw_geometry_shader *shader,
@@ -252,10 +264,12 @@ static void gs_line_adj(struct draw_geometry_shader 
*shader,
indices[2] = i2;
indices[3] = i3;
 
-   shader->fetch_inputs(shader, indices, 4, 0);
+   shader->fetch_inputs(shader, indices, 4,
+shader->fetched_prim_count);
++shader->in_prim_idx;
+   ++shader->fetched_prim_count;
 
-   gs_flush(shader, 1);
+   gs_flush(shader);
 }
 
 static void gs_tri(struct draw_geometry_shader *shader,
@@ -267,10 +281,12 @@ static void gs_tri(struct draw_geometry_shader *shader,
indices[1] = i1;
indices[2] = i2;
 
-   shader->fetch_inputs(shader, indices, 3, 0);
+   shader->fetch_inputs(shader, indices, 3,
+shader->fetched_prim_count);
++shader->in_prim_idx;
+   ++shader->fetched_prim_count;
 
-   gs_flush(shader, 1);
+   gs_flush(shader);
 }
 
 static void gs_tri_adj(struct draw_geometry_shader *shader,
@@ -286,10 +302,12 @@ static void gs_tri_adj(struct draw_geometry_shader 
*shader,
indices[4] = i4;
indices[5] = i5;
 
-   shader->fetch_inputs(shader, indices, 6, 0);
+   shader->fetch_inputs(shader, indices, 6,
+shader->fetched_prim_count);
++shader->in_prim_idx;
+   ++shader->fetched_prim_count;
 
-   gs_flush(shader, 1);
+   gs_flush(shader);
 }
 
 #define FUNC gs_run
@@ -354,6 +372,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
shader->vertex_size = vertex_size;
shader->tmp_output = (float (*)[4])output_verts->verts->data;
shader->in_prim_idx = 0;
+   shader->fetched_prim_count = 0;
shader->input_vertex_stride = input_stride;
shader->input = input;
shader->input_info = input_info;
@@ -369,6 +388,15 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
   gs_run_elts(shader, input_prim, input_verts,
   output_prims, output_verts);
 
+   /* Flush the remaining primitives. Will happen if
+* num_input_primitives % 4 != 0
+*/
+   if (shader->fetched_prim_count > 0) {
+  gs_flush(shader);
+   }
+
+   debug_assert(shader->fetched_prim_count == 0);
+
/* Update prim_info:

[Mesa-dev] [PATCH 0/3] A few fixes for the llvm geometry shaders

2013-03-26 Thread Zack Rusin

A few cleanups, plus fixes for texture sampling for the llvm 
geometry shaders. This is on top of the previous patchset.

Zack Rusin (3):
  draw: Allocate the output buffer for output primitives
  draw/llvm: Cleanup the store debugging code
  llvmpipe/draw: Fix texture sampling in geometry shaders

 src/gallium/auxiliary/draw/draw_context.c   |4 +-
 src/gallium/auxiliary/draw/draw_gs.c|3 +-
 src/gallium/auxiliary/draw/draw_llvm.c  |   96 -
 src/gallium/auxiliary/draw/draw_llvm.h  |   31 +++---
 src/gallium/drivers/llvmpipe/lp_context.c   |4 +
 src/gallium/drivers/llvmpipe/lp_context.h   |1 +
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c   |4 +
 src/gallium/drivers/llvmpipe/lp_state.h |8 ++
 src/gallium/drivers/llvmpipe/lp_state_sampler.c |  127 +++
 9 files changed, 211 insertions(+), 67 deletions(-)

-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] draw: Allocate the output buffer for output primitives

2013-03-26 Thread Zack Rusin

We were allocating the output buffer but using the input
primitives. We need to allocate that buffer using the
maximum number of output, not input, primitives.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_gs.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index c1e1f56..85ea04f 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -500,10 +500,9 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
output_verts->stride = output_verts->vertex_size;
output_verts->verts =
   (struct vertex_header *)MALLOC(output_verts->vertex_size *
- num_in_primitives *
+ max_out_prims *
  shader->max_output_vertices);
 
-
 #if 0
debug_printf("%s count = %d (in prims # = %d)\n",
 __FUNCTION__, num_input_verts, num_in_primitives);
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] draw/llvm: Cleanup the store debugging code

2013-03-26 Thread Zack Rusin


Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_llvm.c |   13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 1b0b1b9..f857183 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -716,13 +716,13 @@ store_aos(struct gallivm_state *gallivm,
indices[1] = index;
indices[2] = lp_build_const_int32(gallivm, 0);
 
+   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
+   data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
+
 #if DEBUG_STORE
lp_build_printf(gallivm, " %p storing attribute %d (io = %p)\n", 
data_ptr, index, io_ptr);
 #endif
 
-   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
-   data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
-
/* Unaligned store due to the vertex header */
lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), 
sizeof(float));
 }
@@ -826,7 +826,7 @@ store_aos_array(struct gallivm_state *gallivm,
  val = adjust_mask(gallivm, val);
  LLVMBuildStore(builder, val, id_ptr);
 #if DEBUG_STORE
- lp_build_printf(gallivm, "io = %p, index %d\n, clipmask = %x\n",
+ lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
  io_ptrs[i], inds[i], val);
 #endif
   }
@@ -1290,9 +1290,6 @@ draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * 
bld_base,
   LLVMValueRef num_vertices =
  LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
 
-  /*lp_build_printf(gallivm, " emitting vertices, %d\n\n",
-num_vertices);*/
-  
   store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, 
"");
   store_ptr = LLVMBuildLoad(builder, store_ptr, "");
   store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
@@ -1318,7 +1315,7 @@ draw_gs_llvm_epilogue(struct lp_build_tgsi_context * 
bld_base,

emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
-   
+
LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
 }
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] llvmpipe/draw: Fix texture sampling in geometry shaders

2013-03-26 Thread Zack Rusin

We weren't correctly propagating the samplers and sampler views
when they were related to geometry shaders.

Signed-off-by: Zack Rusin 
---
 src/gallium/auxiliary/draw/draw_context.c   |4 +-
 src/gallium/auxiliary/draw/draw_llvm.c  |   83 ---
 src/gallium/auxiliary/draw/draw_llvm.h  |   31 +++---
 src/gallium/drivers/llvmpipe/lp_context.c   |4 +
 src/gallium/drivers/llvmpipe/lp_context.h   |1 +
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c   |4 +
 src/gallium/drivers/llvmpipe/lp_state.h |8 ++
 src/gallium/drivers/llvmpipe/lp_state_sampler.c |  127 +++
 8 files changed, 205 insertions(+), 57 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index d64b82b..ceb74df 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -792,8 +792,8 @@ draw_set_samplers(struct draw_context *draw,
draw->num_samplers[shader_stage] = num;
 
 #ifdef HAVE_LLVM
-   if (draw->llvm && shader_stage == PIPE_SHADER_VERTEX)
-  draw_llvm_set_sampler_state(draw);
+   if (draw->llvm)
+  draw_llvm_set_sampler_state(draw, shader_stage);
 #endif
 }
 
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index f857183..3e47452 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -249,17 +249,17 @@ create_gs_jit_context_type(struct gallivm_state *gallivm,
elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  DRAW_TOTAL_CLIP_PLANES), 0);
elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
-   
-   elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
-   elem_types[4] = LLVMPointerType(LLVMVectorType(int_type,
-  vector_length), 0);
-   elem_types[5] = LLVMPointerType(LLVMVectorType(int_type,
-  vector_length), 0);
 
-   elem_types[6] = LLVMArrayType(texture_type,
+   elem_types[3] = LLVMArrayType(texture_type,
  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
-   elem_types[7] = LLVMArrayType(sampler_type,
+   elem_types[4] = LLVMArrayType(sampler_type,
  PIPE_MAX_SAMPLERS); /* samplers */
+   
+   elem_types[5] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
+   elem_types[6] = LLVMPointerType(LLVMVectorType(int_type,
+  vector_length), 0);
+   elem_types[7] = LLVMPointerType(LLVMVectorType(int_type,
+  vector_length), 0);
 
context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
   Elements(elem_types), 0);
@@ -275,18 +275,18 @@ create_gs_jit_context_type(struct gallivm_state *gallivm,
   target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport,
   target, context_type, 2);
-   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
-  target, context_type, 3);
-   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
-  target, context_type, 4);
-   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
-  target, context_type, 5);
LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
   target, context_type,
-  DRAW_GS_JIT_CTX_TEXTURES);
+  DRAW_JIT_CTX_TEXTURES);
LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
   target, context_type,
-  DRAW_GS_JIT_CTX_SAMPLERS);
+  DRAW_JIT_CTX_SAMPLERS);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
+  target, context_type, 5);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
+  target, context_type, 6);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
+  target, context_type, 7);
LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
 target, context_type);
 
@@ -1721,33 +1721,36 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
 
 
 void
-draw_llvm_set_sampler_state(struct draw_context *draw)
+draw_llvm_set_sampler_state(struct draw_context *draw, 
+unsigned shader_type)
 {
unsigned i;
 
-   for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
-  struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
-
-  if (draw->samplers[i]) {
- const struct pipe_sa

1 2 3 4 >

1 - 100 of 334 matches

Mail list logo