On 11/05/2013 01:35 PM, Matt Turner wrote:
---
Chad has benchmark numbers, I think.
Put this in the commit message:
Improves performance of RoboHornet's 2D Canvas toDataURL benchmark
[http://www.robohornet.org/#e=canvastodataurl] by approximately 5x
on Baytrail on ChromiumOS.
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 88 +++++++++++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 2f5e04f..17f0075 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1774,6 +1774,86 @@ intel_miptree_unmap_blit(struct brw_context *brw,
intel_miptree_release(&map->mt);
}
+#ifdef __SSE4_1__
+/**
+ * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
+ */
+static void
+intel_miptree_map_movntdqa(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ struct intel_miptree_map *map,
+ unsigned int level, unsigned int slice)
+{
+ assert(map->mode & GL_MAP_READ_BIT);
+ assert(!(map->mode & GL_MAP_WRITE_BIT));
+
+ DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
+ map->x, map->y, map->w, map->h,
+ mt, _mesa_get_format_name(mt->format),
+ level, slice, map->ptr, map->stride);
+
+ /* Map the original image */
+ uint32_t image_x;
+ uint32_t image_y;
+ intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+ image_x += map->x;
+ image_y += map->y;
+
+ void *src = intel_miptree_map_raw(brw, mt);
+ if (!src) {
+ _mesa_align_free(map->buffer);
I don't think _mesa_align_free() should be here. In fact, I see no
reason to free map->buffer here at all, because it has not yet been
allocated.
+ map->buffer = NULL;
+ map->ptr = NULL;
+ return;
+ }
+ src += image_y * mt->region->pitch;
+ src += image_x * mt->region->cpp;
+
+ /* Due to the pixel offsets for the particular image being mapped, our
+ * src pointer may not be 16-byte aligned. However, if the pitch is
+ * divisible by 16, then the amount by which it's misaligned will remain
+ * consistent from row to row.
+ */
+ assert((mt->region->pitch % 16) == 0);
+ const int misalignment = ((uintptr_t) src) & 15;
^^^^^
Thanks for using const. It makes it easier to reason about the code.
+
+ /* Create an untiled temporary buffer for the mapping. */
+ unsigned stride = _mesa_format_row_stride(mt->format, map->w);
^^^^^^^^
Likewise, let's make that `const unsigned`.
Also, this is a nitpick that you can ignore. The variable `stride`
does not follow the conventions throughout the rest of Mesa. Stride
is the length of a row, including any padding. Yet, in this patch,
`stride` is the row length *without* padding, and `map->stride`
is the row length *with* padding. I'd like to see the variable renamed
to something that complies with the conventions in the rest of Mesa
and the miptree code. Maybe `width_bytes`.
+
+ map->stride = ALIGN(misalignment + stride, 16);
+
+ map->buffer = malloc(misalignment + map->stride * map->h * mt->region->cpp);
^^^^^^^^^^^^
^^^^
The presence of cpp in malloc causes the buffer to be too large. map->stride is
in units
of bytes, so it already includes the cpp.
Also, I don't understand why `misalignment` is added to `map->stride * map->h`.
map->stride
already includes padding for the misalignment.
+ /* Offset the destination so it has the same misalignment as src. */
+ map->ptr = map->buffer + misalignment;
+
+ assert((((uintptr_t) map->ptr) & 15) == misalignment);
+
+ for (uint32_t y = 0; y < map->h; y++) {
+ void *dst_ptr = map->ptr + y * map->stride;
+ void *src_ptr = src + y * mt->region->pitch;
+
+ _mesa_streaming_load_memcpy(dst_ptr, src_ptr, stride);
+
+ dst_ptr += stride;
+ src_ptr += stride;
+ }
+
+ intel_miptree_unmap_raw(brw, mt);
+}
_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev