The 2x and 4x cases are completely broken. The lfdptr instruction returns
garbage there.

The 8x case is broken on Cayman, though at least the result looks somewhat
correct.

I think we're missing some info. Anyway, at least one case works.
---
 src/gallium/auxiliary/util/u_blitter.c       |    8 ++
 src/gallium/auxiliary/util/u_blitter.h       |    6 ++
 src/gallium/drivers/r600/evergreen_state.c   |   39 ++++++--
 src/gallium/drivers/r600/evergreend.h        |    2 +-
 src/gallium/drivers/r600/r600_asm.c          |   10 +-
 src/gallium/drivers/r600/r600_asm.h          |    7 +-
 src/gallium/drivers/r600/r600_blit.c         |   42 ++++----
 src/gallium/drivers/r600/r600_pipe.c         |   29 +++++-
 src/gallium/drivers/r600/r600_pipe.h         |   21 ++++
 src/gallium/drivers/r600/r600_shader.c       |  133 +++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_sq.h           |    3 +
 src/gallium/drivers/r600/r600_state.c        |    3 +-
 src/gallium/drivers/r600/r600_state_common.c |    4 +-
 13 files changed, 270 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 1072a0e..49bde44 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter)
    FREE(ctx);
 }
 
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+   ctx->has_texture_multisample = supported;
+}
+
 static void blitter_set_running_flag(struct blitter_context_priv *ctx)
 {
    if (ctx->base.running) {
diff --git a/src/gallium/auxiliary/util/u_blitter.h 
b/src/gallium/auxiliary/util/u_blitter.h
index 4f71467..f75f81c 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct 
blitter_context *blitter)
    return blitter->pipe;
 }
 
+/**
+ * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
+ */
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported);
+
 /* The default function to draw a rectangle. This can only be used
  * inside of the draw_rectangle callback if the driver overrides it. */
 void util_blitter_draw_rectangle(struct blitter_context *blitter,
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 330c021..967a4af 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen 
*screen,
                return FALSE;
 
        if (sample_count > 1) {
-               if (rscreen->info.drm_minor < 19)
+               if (!rscreen->has_msaa)
                        return FALSE;
 
                switch (sample_count) {
@@ -1081,11 +1081,24 @@ evergreen_create_sampler_view_custom(struct 
pipe_context *ctx,
                                       S_030004_TEX_DEPTH(depth - 1) |
                                       S_030004_ARRAY_MODE(array_mode));
        view->tex_resource_words[2] = (tmp->surface.level[0].offset + 
r600_resource_va(ctx->screen, texture)) >> 8;
-       if (state->u.tex.last_level && texture->nr_samples <= 1) {
+
+       /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
+       if (texture->nr_samples > 1 && rscreen->msaa_texture_support == 
MSAA_TEXTURE_COMPRESSED) {
+               /* XXX the 2x and 4x cases are broken. */
+               if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
+                       /* disable FMASK (0 = disabled) */
+                       view->tex_resource_words[3] = 0;
+                       view->skip_mip_address_reloc = true;
+               } else {
+                       /* FMASK should be in MIP_ADDRESS for multisample 
textures */
+                       view->tex_resource_words[3] = (tmp->fmask_offset + 
r600_resource_va(ctx->screen, texture)) >> 8;
+               }
+       } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
                view->tex_resource_words[3] = (tmp->surface.level[1].offset + 
r600_resource_va(ctx->screen, texture)) >> 8;
        } else {
                view->tex_resource_words[3] = (tmp->surface.level[0].offset + 
r600_resource_va(ctx->screen, texture)) >> 8;
        }
+
        view->tex_resource_words[4] = (word4 |
                                       
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
                                       S_030010_ENDIAN_SWAP(endian));
@@ -1589,9 +1602,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
                        rctx->framebuffer.export_16bpc = false;
                }
 
-               /* Cayman can fetch from a compressed MSAA colorbuffer,
-                * so it's pointless to track them. */
-               if (rctx->chip_class != CAYMAN && rtex->fmask_size && 
rtex->cmask_size) {
+               if (rtex->fmask_size && rtex->cmask_size) {
                        rctx->framebuffer.compressed_cb_mask |= 1 << i;
                }
        }
@@ -2265,13 +2276,15 @@ static void evergreen_emit_sampler_views(struct 
r600_context *rctx,
                r600_write_value(cs, (resource_id_base + resource_index) * 8);
                r600_write_array(cs, 8, rview->tex_resource_words);
 
-               /* XXX The kernel needs two relocations. This is stupid. */
                reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
                                              RADEON_USAGE_READ);
                r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
                r600_write_value(cs, reloc);
-               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-               r600_write_value(cs, reloc);
+
+               if (!rview->skip_mip_address_reloc) {
+                       r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+                       r600_write_value(cs, reloc);
+               }
        }
        state->dirty_mask = 0;
 }
@@ -3364,6 +3377,16 @@ void *evergreen_create_decompress_blend(struct 
r600_context *rctx)
        return evergreen_create_blend_state_mode(&rctx->context, &blend, 
V_028808_CB_DECOMPRESS);
 }
 
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
+{
+       struct pipe_blend_state blend;
+
+       memset(&blend, 0, sizeof(blend));
+       blend.independent_blend_enable = true;
+       blend.rt[0].colormask = 0xf;
+       return evergreen_create_blend_state_mode(&rctx->context, &blend, 
V_028808_CB_FMASK_DECOMPRESS);
+}
+
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 {
        struct pipe_depth_stencil_alpha_state dsa = {{0}};
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index d10ec7f..1c8646d 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -485,7 +485,7 @@
 #define      V_028808_CB_ELIMINATE_FAST_CLEAR          0x00000002
 #define      V_028808_CB_RESOLVE                       0x00000003
 #define      V_028808_CB_DECOMPRESS                    0x00000004
-#define      V_028808_CB_FASK_DECOMPRESS               0x00000005
+#define      V_028808_CB_FMASK_DECOMPRESS              0x00000005
 #define   S_028808_ROP3(x)                             (((x) & 0xFF) << 16)
 #define   G_028808_ROP3(x)                             (((x) >> 16) & 0xFF)
 #define   C_028808_ROP3                                0xFF00FFFF
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 066fb67..1fef3b0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
        return tex;
 }
 
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, 
enum radeon_family family)
+void r600_bytecode_init(struct r600_bytecode *bc,
+                       enum chip_class chip_class,
+                       enum radeon_family family,
+                       enum r600_msaa_texture_mode msaa_texture_mode)
 {
        if ((chip_class == R600) &&
            (family != CHIP_RV670 && family != CHIP_RS780 && family != 
CHIP_RS880)) {
@@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum 
chip_class chip_class, en
 
        LIST_INITHEAD(&bc->cf);
        bc->chip_class = chip_class;
+       bc->msaa_texture_mode = msaa_texture_mode;
 }
 
 static int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode 
*bc, struct r600_bytecod
 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct 
r600_bytecode_tex *tex, unsigned id)
 {
        bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+                            EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
                                S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
                                S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
                                S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
@@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context 
*ctx,
        assert(count < 32);
 
        memset(&bc, 0, sizeof(bc));
-       r600_bytecode_init(&bc, rctx->chip_class, rctx->family);
+       r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
+                          rctx->screen->msaa_texture_support);
 
        for (i = 0; i < count; i++) {
                if (elements[i].instance_divisor > 1) {
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 8a9f318..2c7db2c 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -62,6 +62,7 @@ struct r600_bytecode_alu {
 struct r600_bytecode_tex {
        struct list_head                list;
        unsigned                        inst;
+       unsigned                        inst_mod;
        unsigned                        resource_id;
        unsigned                        src_gpr;
        unsigned                        src_rel;
@@ -195,6 +196,7 @@ struct r600_cf_callstack {
 
 struct r600_bytecode {
        enum chip_class                 chip_class;
+       enum r600_msaa_texture_mode     msaa_texture_mode;
        int                             type;
        struct list_head                cf;
        struct r600_bytecode_cf         *cf_last;
@@ -219,7 +221,10 @@ struct r600_bytecode {
 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf 
*cf);
 
 /* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, 
enum radeon_family family);
+void r600_bytecode_init(struct r600_bytecode *bc,
+                       enum chip_class chip_class,
+                       enum radeon_family family,
+                       enum r600_msaa_texture_mode msaa_texture_mode);
 void r600_bytecode_clear(struct r600_bytecode *bc);
 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct 
r600_bytecode_alu *alu);
 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct 
r600_bytecode_vtx *vtx);
diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index 8597b8d..a19248d 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct 
pipe_context *ctx,
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        unsigned layer, level, checked_last_layer, max_layer;
-
-       assert(rctx->chip_class != CAYMAN);
+       void *blend_decompress;
 
        if (!rtex->dirty_level_mask)
                return;
 
+       switch (rctx->screen->msaa_texture_support) {
+       case MSAA_TEXTURE_DECOMPRESSED:
+               blend_decompress = rctx->custom_blend_decompress;
+               break;
+       case MSAA_TEXTURE_COMPRESSED:
+               /* XXX the 2x and 4x cases are broken. */
+               if (rtex->resource.b.b.nr_samples == 8)
+                       blend_decompress = rctx->custom_blend_fmask_decompress;
+               else
+                       blend_decompress = rctx->custom_blend_decompress;
+               break;
+       case MSAA_TEXTURE_SAMPLE_ZERO:
+       default:
+               /* Nothing to do. */
+               rtex->dirty_level_mask = 0;
+               return;
+       }
+
        for (level = first_level; level <= last_level; level++) {
                if (!(rtex->dirty_level_mask & (1 << level)))
                        continue;
@@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context 
*ctx,
                        cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, 
&surf_tmpl);
 
                        r600_blitter_begin(ctx, R600_DECOMPRESS);
-                       util_blitter_custom_color(rctx->blitter, cbsurf,
-                                                 
rctx->custom_blend_decompress);
+                       util_blitter_custom_color(rctx->blitter, cbsurf, 
blend_decompress);
                        r600_blitter_end(ctx);
 
                        pipe_surface_reference(&cbsurf, NULL);
@@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context 
*rctx,
        unsigned i;
        unsigned mask = textures->compressed_colortex_mask;
 
-       /* Cayman cannot decompress an MSAA colorbuffer,
-        * but it can read it compressed, so skip this. */
-       assert(rctx->chip_class != CAYMAN);
-       if (rctx->chip_class == CAYMAN) {
-               return;
-       }
-
        while (mask) {
                struct pipe_sampler_view *view;
                struct r600_texture *tex;
@@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context 
*ctx,
                                        unsigned level,
                                        unsigned first_layer, unsigned 
last_layer)
 {
-       struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_texture *rtex = (struct r600_texture*)tex;
 
        if (rtex->is_depth && !rtex->is_flushing_texture) {
@@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context 
*ctx,
                                           level, level,
                                           first_layer, last_layer,
                                           0, u_max_sample(tex));
-       } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && 
rtex->cmask_size) {
+       } else if (rtex->fmask_size && rtex->cmask_size) {
                r600_blit_decompress_color(ctx, rtex, level, level,
                                           first_layer, last_layer);
        }
@@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context 
*ctx,
        struct pipe_sampler_view src_templ, *src_view;
        unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, 
src_heightFL;
        struct pipe_box sbox;
+       bool copy_all_samples;
 
        /* Handle buffers first. */
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context 
*ctx,
                                                           src_widthFL, 
src_heightFL);
        }
 
+       copy_all_samples = rctx->screen->msaa_texture_support != 
MSAA_TEXTURE_SAMPLE_ZERO;
+
        /* Copy. */
-       /* XXX Multisample texturing is unimplemented on Cayman. In the 
meantime,
-        * copy only the first sample (which is the only one that is 
uncompressed
-        * and therefore doesn't return garbage). */
        r600_blitter_begin(ctx, R600_COPY_TEXTURE);
        util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
                                  abs(src_box->width), abs(src_box->height),
                                  src_view, src_box, src_width0, src_height0,
                                  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, 
NULL,
-                                 rctx->chip_class != CAYMAN);
+                                 copy_all_samples);
        r600_blitter_end(ctx);
 
        pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5454414..90891c2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context 
*context)
        if (rctx->custom_blend_decompress) {
                rctx->context.delete_blend_state(&rctx->context, 
rctx->custom_blend_decompress);
        }
+       if (rctx->custom_blend_fmask_decompress) {
+               rctx->context.delete_blend_state(&rctx->context, 
rctx->custom_blend_fmask_decompress);
+       }
        util_unreference_framebuffer_state(&rctx->framebuffer.state);
 
        r600_context_fini(rctx);
@@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
                rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
                rctx->custom_blend_resolve = 
evergreen_create_resolve_blend(rctx);
                rctx->custom_blend_decompress = 
evergreen_create_decompress_blend(rctx);
+               rctx->custom_blend_fmask_decompress = 
evergreen_create_fmask_decompress_blend(rctx);
                rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
                                           rctx->family == CHIP_PALM ||
                                           rctx->family == CHIP_SUMO ||
@@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
        rctx->blitter = util_blitter_create(&rctx->context);
        if (rctx->blitter == NULL)
                goto fail;
+       util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
        rctx->blitter->draw_rectangle = r600_draw_rectangle;
 
        r600_begin_new_cs(rctx);
@@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_COMPUTE:
        case PIPE_CAP_START_INSTANCE:
        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-        case PIPE_CAP_TEXTURE_MULTISAMPLE:
                return 1;
 
        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_GLSL_FEATURE_LEVEL:
                return 130;
 
+       case PIPE_CAP_TEXTURE_MULTISAMPLE:
+               return rscreen->msaa_texture_support != 
MSAA_TEXTURE_SAMPLE_ZERO;
+
        /* Supported except the original R600. */
        case PIPE_CAP_INDEP_BLEND_ENABLE:
        case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -950,6 +957,26 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
                break;
        }
 
+       /* MSAA support. */
+       switch (rscreen->chip_class) {
+       case R600:
+       case R700:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 22;
+               rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
+               break;
+       case EVERGREEN:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+               rscreen->msaa_texture_support =
+                       rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED 
:
+                                                       
MSAA_TEXTURE_DECOMPRESSED;
+               break;
+       case CAYMAN:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+               /* We should be able to read compressed MSAA textures, but it 
doesn't work. */
+               rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
+               break;
+       }
+
        if (r600_init_tiling(rscreen)) {
                FREE(rscreen);
                return NULL;
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 578cbbe..c865b2e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -183,6 +183,22 @@ struct r600_pipe_fences {
        pipe_mutex                      mutex;
 };
 
+enum r600_msaa_texture_mode {
+       /* If the hw can fetch the first sample only (no decompression 
available).
+        * This means MSAA texturing is not fully implemented. */
+       MSAA_TEXTURE_SAMPLE_ZERO,
+
+       /* If the hw can fetch decompressed MSAA textures.
+        * Supported families: R600, R700, Evergreen.
+        * Cayman cannot use this, because it cannot do the decompression. */
+       MSAA_TEXTURE_DECOMPRESSED,
+
+       /* If the hw can fetch compressed MSAA textures, which means shaders can
+        * read resolved FMASK. This yields the best performance.
+        * Supported families: Evergreen, Cayman. */
+       MSAA_TEXTURE_COMPRESSED
+};
+
 struct r600_screen {
        struct pipe_screen              screen;
        struct radeon_winsys            *ws;
@@ -190,6 +206,8 @@ struct r600_screen {
        enum chip_class                 chip_class;
        struct radeon_info              info;
        bool                            has_streamout;
+       bool                            has_msaa;
+       enum r600_msaa_texture_mode     msaa_texture_support;
        struct r600_tiling_info         tiling_info;
        struct r600_pipe_fences         fences;
 
@@ -204,6 +222,7 @@ struct r600_pipe_sampler_view {
        struct pipe_sampler_view        base;
        struct r600_resource            *tex_resource;
        uint32_t                        tex_resource_words[8];
+       bool                            skip_mip_address_reloc;
 };
 
 struct r600_rasterizer_state {
@@ -371,6 +390,7 @@ struct r600_context {
        void                            *custom_dsa_flush;
        void                            *custom_blend_resolve;
        void                            *custom_blend_decompress;
+       void                            *custom_blend_fmask_decompress;
        /* With rasterizer discard, there doesn't have to be a pixel shader.
         * In that case, we bind this one: */
        void                            *dummy_pixel_shader;
@@ -520,6 +540,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, 
struct r600_pipe_shader
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
 void *evergreen_create_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
                                      enum pipe_format format,
                                      enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 053a988..9cd5eee 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1178,7 +1178,8 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
        ctx.shader = shader;
        ctx.native_integers = true;
 
-       r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
+       r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+                          rscreen->msaa_texture_support);
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
        tgsi_parse_init(&ctx.parse, tokens);
@@ -3794,10 +3795,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        unsigned src_gpr;
        int r, i, j;
        int opcode;
+       bool read_compressed_msaa = ctx->bc->msaa_texture_mode == 
MSAA_TEXTURE_COMPRESSED &&
+                                   inst->Instruction.Opcode == TGSI_OPCODE_TXF 
&&
+                                   (inst->Texture.Texture == 
TGSI_TEXTURE_2D_MSAA ||
+                                    inst->Texture.Texture == 
TGSI_TEXTURE_2D_ARRAY_MSAA);
        /* Texture fetch instructions can only use gprs as source.
         * Also they cannot negate the source or take the absolute value */
-       const boolean src_requires_loading = inst->Instruction.Opcode != 
TGSI_OPCODE_TXQ_LZ &&
-                                             
tgsi_tex_src_requires_loading(ctx, 0);
+       const boolean src_requires_loading = (inst->Instruction.Opcode != 
TGSI_OPCODE_TXQ_LZ &&
+                                              
tgsi_tex_src_requires_loading(ctx, 0)) ||
+                                            read_compressed_msaa;
        boolean src_loaded = FALSE;
        unsigned sampler_src_reg = inst->Instruction.Opcode == 
TGSI_OPCODE_TXQ_LZ ? 0 : 1;
        uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
@@ -4068,6 +4074,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                src_gpr = ctx->temp_reg;
        }
 
+       /* Obtain the sample index for reading a compressed MSAA color texture.
+        * To read the FMASK, we use the ldfptr instruction, which tells us
+        * where the samples are stored.
+        * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
+        * which is the identity mapping. Each nibble says which physical sample
+        * should be fetched to get that sample.
+        *
+        * Assume src.z contains the sample index. It should be modified like 
this:
+        *   src.z = (ldfptr() >> (src.z * 4)) & 0xF;
+        * Then fetch the texel with src.
+        */
+       if (read_compressed_msaa) {
+               unsigned sample_chan = inst->Texture.Texture == 
TGSI_TEXTURE_2D_MSAA ? 3 : 4;
+               unsigned temp = r600_get_temp(ctx);
+               assert(src_loaded);
+
+               /* temp.w = ldfptr() */
+               memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+               tex.inst = SQ_TEX_INST_LD;
+               tex.inst_mod = 1; /* to indicate this is ldfptr */
+               tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+               tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+               tex.src_gpr = src_gpr;
+               tex.dst_gpr = temp;
+               tex.dst_sel_x = 7; /* mask out these components */
+               tex.dst_sel_y = 7;
+               tex.dst_sel_z = 7;
+               tex.dst_sel_w = 0; /* store X */
+               tex.src_sel_x = 0;
+               tex.src_sel_y = 1;
+               tex.src_sel_z = 2;
+               tex.src_sel_w = 3;
+               tex.offset_x = offset_x;
+               tex.offset_y = offset_y;
+               tex.offset_z = offset_z;
+               r = r600_bytecode_add_tex(ctx->bc, &tex);
+               if (r)
+                       return r;
+
+               /* temp.x = sample_index*4 */
+               if (ctx->bc->chip_class == CAYMAN) {
+                       for (i = 0 ; i < 4; i++) {
+                               memset(&alu, 0, sizeof(struct 
r600_bytecode_alu));
+                               alu.inst = ctx->inst_info->r600_opcode;
+                               alu.src[0].sel = src_gpr;
+                               alu.src[0].chan = sample_chan;
+                               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+                               alu.src[1].value = 4;
+                               alu.dst.sel = temp;
+                               alu.dst.chan = i;
+                               alu.dst.write = i == 0;
+                               if (i == 3)
+                                       alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.inst = 
CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
+                       alu.src[0].sel = src_gpr;
+                       alu.src[0].chan = sample_chan;
+                       alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+                       alu.src[1].value = 4;
+                       alu.dst.sel = temp;
+                       alu.dst.chan = 0;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+
+               /* sample_index = temp.w >> temp.x */
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
+               alu.src[0].sel = temp;
+               alu.src[0].chan = 3;
+               alu.src[1].sel = temp;
+               alu.src[1].chan = 0;
+               alu.dst.sel = src_gpr;
+               alu.dst.chan = sample_chan;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+
+               /* sample_index & 0xF */
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+               alu.src[0].sel = src_gpr;
+               alu.src[0].chan = sample_chan;
+               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[1].value = 0xF;
+               alu.dst.sel = src_gpr;
+               alu.dst.chan = sample_chan;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+#if 0
+               /* visualize the FMASK */
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.inst = 
CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+                       alu.src[0].sel = src_gpr;
+                       alu.src[0].chan = sample_chan;
+                       alu.dst.sel = 
ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+                       alu.dst.chan = i;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+               return 0;
+#endif
+       }
+
        opcode = ctx->inst_info->r600_opcode;
        if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
            inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
diff --git a/src/gallium/drivers/r600/r600_sq.h 
b/src/gallium/drivers/r600/r600_sq.h
index 4b2a19a..587f88d 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -375,6 +375,9 @@
 #define   S_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) & 
0x1) << 5)
 #define   G_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) >> 
5) & 0x1)
 #define   C_SQ_TEX_WORD0_BC_FRAC_MODE                                0xFFFFFFDF
+#define   EG_S_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) 
& 0x3) << 5)
+#define   EG_G_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) 
>> 5) & 0x3)
+#define   EG_C_SQ_TEX_WORD0_INST_MOD                                    
0xFFFFFF9F
 #define   S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) & 
0x1) << 7)
 #define   G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) >> 
7) & 0x1)
 #define   C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD                            0xFFFFFF7F
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7ae4558..175287c 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
                return FALSE;
 
        if (sample_count > 1) {
-               if (rscreen->info.drm_minor < 22)
+               if (!rscreen->has_msaa)
                        return FALSE;
 
                /* R11G11B10 is broken on R6xx. */
@@ -1994,7 +1994,6 @@ static void r600_emit_sampler_views(struct r600_context 
*rctx,
                r600_write_value(cs, (resource_id_base + resource_index) * 7);
                r600_write_array(cs, 7, rview->tex_resource_words);
 
-               /* XXX The kernel needs two relocations. This is stupid. */
                reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
                                              RADEON_USAGE_READ);
                r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index ef18f6b..0b423be 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context 
*pipe, unsigned shader,
                                dst->views.compressed_depthtex_mask &= ~(1 << 
i);
                        }
 
-                       /* Track compressed colorbuffers for Evergreen (Cayman 
doesn't need this). */
-                       if (rctx->chip_class != CAYMAN && rtex->cmask_size && 
rtex->fmask_size) {
+                       /* Track compressed colorbuffers. */
+                       if (rtex->cmask_size && rtex->fmask_size) {
                                dst->views.compressed_colortex_mask |= 1 << i;
                        } else {
                                dst->views.compressed_colortex_mask &= ~(1 << 
i);
-- 
1.7.9.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to