From: Ian Romanick <[email protected]>

This changes the text of the shader quite significantly, but it should
produce the same (or nearly same) code.  This makes the shader much
easier to understand because the code is now all in one place instead of
being scattered about the C code.  This makes it much easier to
implement the optimization in the next patch...

NOTE: The current code is limited to 32 samples.  Almost everywhere in
OpenGL uses an integer mask for samples, so a lot of stuff will need to
change to support > 32 samples.

Signed-off-by: Ian Romanick <[email protected]>
---
 src/mesa/drivers/common/meta_blit.c | 67 ++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/common/meta_blit.c 
b/src/mesa/drivers/common/meta_blit.c
index 355c937..28aabd3 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -518,10 +518,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                      tex_coords);
 
       } else {
-         char *sample_resolve;
-         int i;
-         int step;
-
          /* We're assuming power of two samples for this resolution procedure.
           *
           * To avoid losing any floating point precision if the samples all
@@ -530,26 +526,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
           * doing a naive sum and dividing.
           */
          assert(_mesa_is_pow_two(samples));
-         /* Fetch each individual sample. */
-         sample_resolve = rzalloc_size(mem_ctx, 1);
-         for (i = 0; i < samples; i++) {
-            ralloc_asprintf_append(&sample_resolve,
-                                   "   gvec4 sample_1_%d = 
texelFetch(texSampler, i%s(texCoords), %d);\n",
-                                   i, texcoord_type, i);
-         }
-         /* Now, merge each pair of samples, then merge each pair of those,
-          * etc.
-          */
-         for (step = 2; step <= samples; step *= 2) {
-            for (i = 0; i < samples; i += step) {
-               ralloc_asprintf_append(&sample_resolve,
-                                      "   gvec4 sample_%d_%d = 
merge(sample_%d_%d, sample_%d_%d);\n",
-                                      step, i,
-                                      step / 2, i,
-                                      step / 2, i + step / 2);
-            }
-         }
-
+         assert(samples <= 32);
          fs_source = ralloc_asprintf(mem_ctx,
                                      "#version 130\n"
                                      "#extension GL_ARB_texture_multisample: 
require\n"
@@ -557,30 +534,60 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                      "uniform %ssampler2DMS%s texSampler;\n"
                                      "in %s texCoords;\n"
                                      "out gvec4 out_color[%u];\n"
+                                     "#define SAMPLES %d\n"
+                                     "#define S s%d\n"
                                      "\n"
                                      "uvec4 merge(uvec4 a, uvec4 b) { return 
(a >> 1) + (b >> 1) + (a & b & 1u); }\n"
                                      "ivec4 merge(ivec4 a, ivec4 b) { return 
(a >> 1) + (b >> 1) + (a & b & 1); }\n"
                                      /* The divide will happen at the end for 
floats. */
                                      "vec4 merge(vec4 a, vec4 b) { return a + 
b; }\n"
+
+                                     /* Reduce from N samples to N/2 samples.
+                                      *
+                                      * NOTE: Missing \n characters from each
+                                      * line of the macro definition is
+                                      * intentional!
+                                      */
+                                     "#define REDUCE(dst, src)"
+                                     "   do {"
+                                     "      if (src.length() <= SAMPLES) {"
+                                     "         for (i = 0; i < dst.length(); 
i++)"
+                                     "            dst[i] = merge(src[i*2], 
src[i*2+1]);"
+                                     "     }"
+                                     "   } while (false)\n"
+                                     "\n"
                                      "void emit2(gvec4 s) { for (int i = 0; i 
< out_color.length(); i++) out_color[i] = s; }\n"
                                      "void emit(ivec4 s) { emit2(gvec4(s)); 
}\n"
                                      "void emit(uvec4 s) { emit2(gvec4(s)); 
}\n"
                                      /* Scale the final result. */
-                                     "void emit(vec4 s) { emit2(gvec4(s / 
%f)); }\n"
+                                     "void emit(vec4 s) { emit2(gvec4(s / 
float(SAMPLES))); }\n"
                                      "\n"
                                      "void main()\n"
                                      "{\n"
-                                     "%s\n" /* sample_resolve */
-                                     "    emit(sample_%d_0);\n"
+                                     "   gvec4 s32[32], s16[16], s8[8];\n"
+                                     "   gvec4 s4[4], s2[2], s1[1];\n"
+                                     "   i%s tc = i%s(texCoords);\n"
+                                     "   int i;\n"
+                                     "\n"
+                                     "   for (i = 0; i < SAMPLES; i++)\n"
+                                     "      S[i] = texelFetch(texSampler, tc, 
i);\n"
+                                     "\n"
+                                     "   REDUCE(s16, s32);\n"
+                                     "   REDUCE(s8, s16);\n"
+                                     "   REDUCE(s4, s8);\n"
+                                     "   REDUCE(s2, s4);\n"
+                                     "   REDUCE(s1, s2);\n"
+                                     "   emit(s1[0]);\n"
                                      "}\n",
                                      vec4_prefix,
                                      vec4_prefix,
                                      sampler_array_suffix,
                                      texcoord_type,
                                      drawFb->_NumColorDrawBuffers,
-                                     (float) samples,
-                                     sample_resolve,
-                                     samples);
+                                     samples,
+                                     samples,
+                                     texcoord_type,
+                                     texcoord_type);
       }
 
       vs_source = ralloc_asprintf(mem_ctx,
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to