Using _mm_loadl_epi64() to load two pixels at once (pairs of top
and bottom pixels) is faster than loading each pixel separately
and combining them with _mm_set_epi32().
=== cairo-perf-trace ===
before: image firefox-fishtank 66.912 66.931 0.13% 3/3
after: image firefox-fishtank 57.584 58.349 0.74% 3/3
=== lowlevel-blt-bench ===
before: src_8888_8888 = L1: 181.10 L2: 179.14 M:178.08 ( 11.02%) HT:153.22
VT:133.45 R:142.24 RT: 95.32
after: src_8888_8888 = L1: 228.68 L2: 225.75 M:223.98 ( 14.23%) HT:185.32
VT:155.06 R:162.73 RT:102.52
---
pixman/pixman-sse2.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 0604254..ef82a18 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5377,17 +5377,16 @@ FAST_NEAREST_MAINLOOP_COMMON
(sse2_8888_n_8888_none_OVER,
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)
\
do {
\
__m128i xmm_wh, xmm_lo, xmm_hi, a;
\
- /* fetch 2x2 pixel block into sse2 register */
\
- uint32_t tl = src_top [pixman_fixed_to_int (vx)];
\
- uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
\
- uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
\
- uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
\
- a = _mm_set_epi32 (tr, tl, br, bl);
\
+ /* fetch 2x2 pixel block into sse2 registers */
\
+ __m128i tltr = _mm_loadl_epi64 (
\
+ (__m128i *)&src_top[pixman_fixed_to_int (vx)]);
\
+ __m128i blbr = _mm_loadl_epi64 (
\
+ (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]);
\
vx += unit_x;
\
/* vertical interpolation */
\
- a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero),
\
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero),
\
xmm_wt),
\
- _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero),
\
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero),
\
xmm_wb));
\
/* calculate horizontal weights */
\
xmm_wh = _mm_add_epi16 (xmm_addc,
\
--
1.7.3.4
_______________________________________________
Pixman mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/pixman