Patch to add ARMv5 optimised 8888 & 565 blitters/fill to Pixman

Signed-off-by: Andre Renaud <andre@bluewatersys.com>
---

diff --git a/configure.ac b/configure.ac
index 5fda547..477b165 100644
--- a/configure.ac
+++ b/configure.ac
@@ -604,6 +604,29 @@ if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
 fi
 
 dnl ===========================================================================
+dnl Check for ARMv5
+
+AC_ARG_ENABLE(arm-v5,
+   [AC_HELP_STRING([--disable-arm-v5],
+                   [disable ARM V5 fast paths])],
+   [enable_arm_v5=$enableval], [enable_arm_v5=no])
+
+if test $enable_arm_v5 = no ; then
+   have_arm_v5=disabled
+else
+    have_arm_v5=yes
+fi
+
+if test $have_arm_v5 = yes ; then
+   AC_DEFINE(USE_ARM_V5, 1, [use ARM v5 assembly optimizations])
+fi
+
+AM_CONDITIONAL(USE_ARM_V5, test $have_arm_v5 = yes)
+
+AC_MSG_CHECKING(whether to use ARM V5 assembler)
+AC_MSG_RESULT($have_arm_v5)
+
+dnl ===========================================================================
 dnl Check for IWMMXT
 
 AC_ARG_ENABLE(arm-iwmmxt,
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 270d65e..3233f0f 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -70,6 +70,19 @@ libpixman_1_la_LIBADD += libpixman-arm-simd.la
 ASM_CFLAGS_arm_simd=
 endif
 
+# arm v5 code
+if USE_ARM_V5
+noinst_LTLIBRARIES += libpixman-arm-v5.la
+libpixman_arm_v5_la_SOURCES = \
+        pixman-arm-v5.c	\
+        pixman-arm-common.h	\
+        pixman-arm-v5-asm.S	\
+        pixman-arm-v5-asm.h
+libpixman_1_la_LIBADD += libpixman-arm-v5.la
+
+ASM_CFLAGS_arm_v5=
+endif
+
 # arm neon code
 if USE_ARM_NEON
 noinst_LTLIBRARIES += libpixman-arm-neon.la
diff --git a/pixman/pixman-arm-v5-asm.S b/pixman/pixman-arm-v5-asm.S
new file mode 100644
index 0000000..10b1b1c
--- /dev/null
+++ b/pixman/pixman-arm-v5-asm.S
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2005-2008, The Android Open Source Project
+ * Copyright (c) 2010, Code Aurora Forum. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+
+ */
+
+/* Changes:
+ * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com>
+ *    Added small changes to the two functions to make them work on the
+ *    specified number of 16- or 32-bit values rather than the original
+ *    code which was specified as a count of bytes. More verbose comments
+ *    to aid future maintenance.
+ */
+
+    .text
+    .align 4
+    .syntax unified
+
+    .global pixman_arm_memset32
+    .type   pixman_arm_memset32, %function
+    .global pixman_arm_memset16
+    .type   pixman_arm_memset16, %function
+    .global pixman_arm_blitrow_32
+    .type   pixman_arm_blitrow_32, %function
+    .global pixman_arm_blitrow_565
+    .type   pixman_arm_blitrow_565, %function
+
+// uses r6, r7, r8, r9, r10, lr
+
+.macro pixel,   DREG, SRC, FB, OFFSET
+
+    // SRC = AARRGGBB
+    subs   r7, r10, \SRC, lsr #24           // sAA = 255 - sAA
+    beq    1f
+// FIXME: Do we want to multiply the alpha, I thought they were premultiplied?
+.if \OFFSET
+
+    // red
+    mov     lr, \DREG, lsr #(\OFFSET + 6 + 5)
+    smlabb  lr, r7, lr, r8
+    and     r6, r10, \SRC, lsr #(8 + 8)
+    add     lr, lr, lr, lsr #5
+    add     lr, r6, lr, lsr #5
+    lsr     lr, #3
+    orr     \FB, \FB, lr, lsl #(\OFFSET + 11)
+
+        // green
+        and     r6, \DREG, #(0x3F<<(\OFFSET + 5))
+        lsr     r6, #5
+        smlabt  r6, r7, r6, r9
+        and     lr, r10, \SRC, lsr #(8)
+        add     r6, r6, r6, lsr #6
+        add     r6, lr, r6, lsr #6
+        lsr     r6, #2
+        orr     \FB, \FB, r6, lsl #(\OFFSET + 5)
+
+            // blue
+            and     lr, \DREG, #(0x1F << \OFFSET)
+            smlabt  lr, r7, lr, r8
+            and     r6, r10, \SRC
+            add     lr, lr, lr, lsr #5
+            add     lr, r6, lr, lsr #5
+            lsr     lr, #3
+            orr     \FB, \FB, lr, lsl #\OFFSET
+
+.else
+
+    // red
+    mov     lr, \DREG, lsr #(6+5)
+    and     lr, lr, #0x1F
+    smlabb  lr, r7, lr, r8
+    and     r6, r10, \SRC, lsr #(8 + 8)
+    add     lr, lr, lr, lsr #5
+    add     lr, r6, lr, lsr #5
+    lsr     lr, #3
+    mov     \FB, lr, lsl #11
+
+        // green
+        and     r6, \DREG, #(0x3F<<5)
+        lsr     r6, #5
+        smlabb  r6, r7, r6, r9
+        and     lr, r10, \SRC, lsr #(8)
+        add     r6, r6, r6, lsr #6
+        add     r6, lr, r6, lsr #6
+        lsr     r6, #2
+        orr     \FB, \FB, r6, lsl #5
+
+            // blue
+            and     lr, \DREG, #0x1F
+            smlabb  lr, r7, lr, r8
+            and     r6, r10, \SRC
+            add     lr, lr, lr, lsr #5
+            add     lr, r6, lr, lsr #5
+            orr     \FB, \FB, lr, lsr #3
+
+.endif
+   b      2f
+
+   /*
+    * When alpha = 255, down scale the source RGB pixel (24 bits)
+    * to 16 bits(RGB565)
+    */
+1:
+    lsr    lr, \SRC, #3        // 0001 1111 111R RRRR RRRG GGGG GGGB BBBB
+    and    r7, \SRC, #0xf80000 // 0000 0000 RRRR R000 0000 0000 0000 0000
+    and    lr, lr, #0x01f      // 0000 0000 0000 0000 0000 0000 000B BBBB
+    orr    lr, lr, r7, lsr #8  // 0000 0000 0000 0000 RRRR R000 000B BBBB
+    and    r6, \SRC, #0xfc00   // 0000 0000 0000 0000 GGGG GG00 0000 0000
+
+.if \OFFSET
+    orr    lr, lr, r6, lsr #5  // 0000 0000 0000 0000 RRRR RGGG GGGB BBBB
+    orr    \FB, \FB, lr, lsl #(\OFFSET)
+.else
+    orr    \FB, lr, r6, lsr #5
+.endif
+
+2:
+.endm
+
+
+// r0:  dst ptr
+// r1:  src ptr
+// r2:  count
+// r3:  d
+// r4:  s0
+// r5:  s1
+// r6:  pixel
+// r7:  pixel
+// r8:  0x10
+// r9:  0x20
+// r10: 0xFF
+// r11: free
+// r12: scratch
+// r14: free
+
+pixman_arm_blitrow_565:
+    stmfd	sp!, {r4-r10, lr}
+
+blit_less_than_16_left:
+    pld     [r1]
+
+    mov     r8,  #0x10
+    mov     r9,  #0x20
+    mov     r10, #0xFF
+
+    cmp    r2, #1
+    pld     [r0]
+    ble     9f
+    sub    r2, r2, #2
+
+    // if r0 is unaligned, we need to do a special case, like at the end
+    ands    r4, r0, #3
+    beq     8f
+
+    sub     r2, r2, #1
+    ldr     r4, [r1], #4
+    ldrh    r3, [r0]
+    pixel   r3, r4, r12, 0
+    strh    r12, [r0], #2
+
+    // The main loop is unrolled thrice and process 6 pixels
+8:  ldmia   r1!, {r4, r5}
+    // stream the source
+    pld     [r1, #32]
+    add     r0, r0, #4
+    // it's all zero, skip this pixel
+    orrs    r3, r4, r5
+    beq     7f
+
+    // load the destination
+    ldr     r3, [r0, #-4]
+    // stream the destination
+    pld     [r0, #32]
+    pixel   r3, r4, r12, 0
+    pixel   r3, r5, r12, 16
+    // effectively, we're getting write-combining by virtue of the
+    // cpu's write-back cache.
+    str     r12, [r0, #-4]
+
+    // 2nd iteration of the loop, don't stream anything
+    cmp     r2, #1
+    ble     9f
+    sub     r2, r2, #2
+    ldmia   r1!, {r4, r5}
+    add     r0, r0, #4
+    orrs    r3, r4, r5
+    beq     7f
+    ldr     r3, [r0, #-4]
+    pixel   r3, r4, r12, 0
+    pixel   r3, r5, r12, 16
+    str     r12, [r0, #-4]
+
+    // 3rd iteration of the loop, don't stream anything
+    cmp     r2, #1
+    ble     9f
+    sub     r2, r2, #2
+    ldmia   r1!, {r4, r5}
+    add     r0, r0, #4
+    orrs    r3, r4, r5
+    beq     7f
+    ldr     r3, [r0, #-4]
+    pixel   r3, r4, r12, 0
+    pixel   r3, r5, r12, 16
+    str     r12, [r0, #-4]
+
+7:
+    cmp    r2, #1
+    subgt    r2, r2, #2
+    bgt       8b
+
+9:
+    ldmnefd sp!, {r4-r10, lr}        // return
+    bxne    lr
+
+    // last pixel left
+    ldr     r4, [r1]
+    ldrh    r3, [r0]
+    pixel   r3, r4, r12, 0
+    strh    r12, [r0]
+    ldmfd   sp!, {r4-r10, lr}        // return
+    bx      lr
+
+pixman_arm_blitrow_32:
+
+    push     {r4-r11}
+/*
+ * r0 - dst
+ * r1 - src
+ * r2 - count
+ */
+.Lresidual_loop:
+    mov      r10, #0xFF
+    orr      r10, r10, r10, lsl #16    //mask = r10 = 0x00FF00FF
+
+    subs     r2, r2, #2
+    blt      .Lblitrow32_single_loop
+
+.Lblitrow32_double_loop:
+    ldm      r0, {r3, r4}
+    ldm      r1!, {r5, r6}
+
+    orrs     r9, r3, r4
+    beq      .Lblitrow32_loop_cond
+
+    // First iteration
+    lsr      r7, r5, #24               //extract alpha
+    and      r8, r3, r10               //rb = (dst & mask)
+    rsb      r7, r7, #256              //r5 = scale = (255-alpha)+1
+    and      r9, r10, r3, lsr #8       //ag = (dst>>8) & mask
+
+    mul      r11, r8, r7               //RB = rb * scale
+    mul      r3, r9, r7                //AG = ag * scale
+
+    // combine RB and AG
+    and      r11, r10, r11, lsr #8     //r8 = (RB>>8) & mask
+    and      r3, r3, r10, lsl #8       //r9 = AG & ~mask
+
+    lsr      r7, r6, #24               //extract alpha for second iteration
+    orr      r3, r3, r11
+
+    // Second iteration
+    and      r8, r4, r10               //rb = (dst & mask)
+    rsb      r7, r7, #256              //r5 = scale = (255-alpha)+1
+    and      r9, r10, r4, lsr #8       //ag = (dst>>8) & mask
+
+    mul      r11, r8, r7               //RB = rb * scale
+    mul      r4, r9, r7                //AG = ag * scale
+
+    // combine RB and AG
+    and      r11, r10, r11, lsr #8     //r8 = (RB>>8) & mask
+    and      r4, r4, r10, lsl #8       //r9 = AG & ~mask
+    orr      r4, r4, r11
+
+    // add src to combined value
+    add      r5, r5, r3
+    add      r6, r6, r4
+
+.Lblitrow32_loop_cond:
+    subs     r2, r2, #2
+    stm      r0!, {r5, r6}
+
+    bge      .Lblitrow32_double_loop
+
+.Lblitrow32_single_loop:
+    adds     r2, #1
+    blo      .Lexit
+
+    ldr      r3, [r0]
+    ldr      r5, [r1], #4
+
+    cmp      r3, #0
+    beq      .Lblitrow32_single_store
+
+    lsr      r7, r5, #24               //extract alpha
+    and      r8, r3, r10               //rb = (dst & mask)
+    rsb      r7, r7, #256              //r5 = scale = (255-alpha)+1
+    and      r9, r10, r3, lsr #8       //ag = (dst>>8) & mask
+
+    mul      r8, r8, r7                //RB = rb * scale
+    mul      r9, r9, r7                //AG = ag * scale
+
+    // combine RB and AG
+    and      r8, r10, r8, lsr #8       //r8 = (RB>>8) & mask
+    and      r9, r9, r10, lsl #8       //r9 = AG & ~mask
+    orr      r3, r8, r9
+
+    add      r5, r5, r3                //add src to combined value
+
+.Lblitrow32_single_store:
+    str      r5, [r0], #4
+
+.Lexit:
+    pop      {r4-r11}
+    bx       lr
+
+/*
+ * Optimized memset functions for ARM.
+ *
+ * void pixman_arm_memset16(uint16_t* dst, uint16_t value, int count);
+ * void pixman_arm_memset32(uint32_t* dst, uint32_t value, int count);
+ *
+ */
+pixman_arm_memset16:
+        .fnstart
+        push        {lr}
+
+        /* if count is equal to zero then abort */
+        teq         r2, #0
+        ble         .Lfinish
+
+        /* Multiply count by 2 - go from the number of 16-bit shorts
+         * to the number of bytes desired. */
+        mov         r2, r2, lsl #1
+
+        /* expand the data to 32 bits */
+        orr         r1, r1, r1, lsl #16
+
+        /* align to 32 bits */
+        tst         r0, #2
+        strhne      r1, [r0], #2
+        subne       r2, r2, #2
+
+        /* Now jump into the main loop below. */
+        b           .Lwork_32
+        .fnend
+
+pixman_arm_memset32:
+        .fnstart
+        push        {lr}
+
+        /* if count is equal to zero then abort */
+        teq         r2, #0
+        ble         .Lfinish
+
+        /* Multiply count by 4 - go from the number of 32-bit words to
+         * the number of bytes desired. */
+        mov         r2, r2, lsl #2
+
+.Lwork_32:
+        /* Set up registers ready for writing them out. */
+        mov         ip, r1
+        mov         lr, r1
+
+        /* Try to align the destination to a cache line. Assume 32
+         * byte (8 word) cache lines, it's the common case. */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0x1C
+        beq         .Laligned32
+        cmp         r3, r2
+        andhi       r3, r2, #0x1C
+        sub         r2, r2, r3
+
+        /* (Optionally) write any unaligned leading bytes.
+         * (0-28 bytes, length in r3) */
+        movs        r3, r3, lsl #28
+        stmiacs     r0!, {r1, lr}
+        stmiacs     r0!, {r1, lr}
+        stmiami     r0!, {r1, lr}
+        movs        r3, r3, lsl #2
+        strcs       r1, [r0], #4
+
+        /* Now quickly loop through the cache-aligned data. */
+.Laligned32:
+        mov         r3, r1
+1:      subs        r2, r2, #32
+        stmiahs     r0!, {r1,r3,ip,lr}
+        stmiahs     r0!, {r1,r3,ip,lr}
+        bhs         1b
+        add         r2, r2, #32
+
+        /* (Optionally) store any remaining trailing bytes.
+         * (0-30 bytes, length in r2) */
+        movs        r2, r2, lsl #28
+        stmiacs     r0!, {r1,r3,ip,lr}
+        stmiami     r0!, {r1,lr}
+        movs        r2, r2, lsl #2
+        strcs       r1, [r0], #4
+        strhmi      lr, [r0], #2
+
+.Lfinish:
+        pop         {pc}
+        .fnend
diff --git a/pixman/pixman-arm-v5-asm.h b/pixman/pixman-arm-v5-asm.h
new file mode 100644
index 0000000..3dfad26
--- /dev/null
+++ b/pixman/pixman-arm-v5-asm.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012, Bluewater Systems
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of ARM Ltd not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  ARM Ltd makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Andre Renaud (andre@bluewatersys.com)
+ *
+ */
+
+void pixman_arm_memset16(uint16_t* dst, uint16_t value, int count);
+void pixman_arm_memset32(uint32_t* dst, uint32_t value, int count);
+void pixman_arm_blitrow_32(uint32_t *dst, uint32_t *src, int count);
+void pixman_arm_blitrow_565(uint16_t *dst, uint32_t *src, int count);
diff --git a/pixman/pixman-arm-v5.c b/pixman/pixman-arm-v5.c
new file mode 100644
index 0000000..21bb719
--- /dev/null
+++ b/pixman/pixman-arm-v5.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2012, Bluewater Systems
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of ARM Ltd not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  ARM Ltd makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Andre Renaud (andre@bluewatersys.com)
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-arm-common.h"
+#include "pixman-arm-v5-asm.h"
+
+static void
+arm_v5_fill_16 (uint16_t *bits,
+                int       stride,
+                int       x,
+                int       y,
+                int       width,
+                int       height,
+                uint32_t  xor)
+{
+    int short_stride =
+        (stride * sizeof (uint32_t)) / sizeof(uint16_t);
+    bits = bits + y * short_stride + x;
+    while (height--) {
+        pixman_arm_memset16(bits, xor, width);
+        bits += short_stride;
+    }
+}
+
+static void
+arm_v5_fill_32 (uint32_t *bits,
+                int       stride,
+                int       x,
+                int       y,
+                int       width,
+                int       height,
+                uint32_t  xor)
+{
+    bits = bits + y * stride + x;
+    while (height--) {
+        pixman_arm_memset32(bits, xor, width);
+        bits += stride;
+    }
+}
+
+static pixman_bool_t
+arm_v5_fill (pixman_implementation_t *imp,
+             uint32_t *               bits,
+             int                      stride,
+             int                      bpp,
+             int                      x,
+             int                      y,
+             int                      width,
+             int                      height,
+             uint32_t                 _xor)
+{
+    switch (bpp)
+    {
+    case 16:
+        arm_v5_fill_16((uint16_t *)bits, stride, x, y, width, height, _xor);
+        return TRUE;
+    case 32:
+        arm_v5_fill_32(bits, stride, x, y, width, height, _xor);
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static void
+arm_v5_composite_over_8888_0565 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS(info);
+    uint16_t *dst;
+    uint32_t *src;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst, 1);
+    while (height--)
+    {
+        pixman_arm_blitrow_565 (dst, src, width);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static void
+arm_v5_composite_over_8888_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS(info);
+    uint32_t *dst;
+    uint32_t *src;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
+    while (height--)
+    {
+        pixman_arm_blitrow_32 (dst, src, width);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static const pixman_fast_path_t arm_v5_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH(OVER, a8r8g8b8, null, r5g6b5, arm_v5_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH(OVER, a8b8g8r8, null, b5g6r5, arm_v5_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH(OVER, a8r8g8b8, null, a8r8g8b8, arm_v5_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH(OVER, a8r8g8b8, null, x8r8g8b8, arm_v5_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH(OVER, a8b8g8r8, null, a8b8g8r8, arm_v5_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH(OVER, a8b8g8r8, null, x8b8g8r8, arm_v5_composite_over_8888_8888),
+    { PIXMAN_OP_NONE },
+};
+
+pixman_implementation_t *
+_pixman_implementation_create_arm_v5 (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp =
+        _pixman_implementation_create (fallback, arm_v5_fast_paths);
+
+    imp->fill = arm_v5_fill;
+
+    return imp;
+}
diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c
index 23374e4..2e31406 100644
--- a/pixman/pixman-arm.c
+++ b/pixman/pixman-arm.c
@@ -31,7 +31,8 @@ typedef enum
     ARM_V6		= (1 << 1),
     ARM_VFP		= (1 << 2),
     ARM_NEON		= (1 << 3),
-    ARM_IWMMXT		= (1 << 4)
+    ARM_IWMMXT		= (1 << 4),
+    ARM_V5		= (1 << 5),
 } arm_cpu_features_t;
 
 #if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
@@ -168,6 +169,8 @@ detect_cpu_features (void)
 		    features |= (ARM_V7 | ARM_V6);
 		else if (strncmp (plat, "v6l", 3) == 0)
 		    features |= ARM_V6;
+		else if (strncmp (plat, "v5l", 3) == 0)
+		    features |= ARM_V5;
 	    }
 	}
 	close (fd);
@@ -206,6 +209,11 @@ have_feature (arm_cpu_features_t feature)
 pixman_implementation_t *
 _pixman_arm_get_implementations (pixman_implementation_t *imp)
 {
+#ifdef USE_ARM_V5
+    if (!_pixman_disabled ("arm-v5") && have_feature (ARM_V5))
+        imp = _pixman_implementation_create_arm_v5 (imp);
+#endif
+
 #ifdef USE_ARM_SIMD
     if (!_pixman_disabled ("arm-simd") && have_feature (ARM_V6))
 	imp = _pixman_implementation_create_arm_simd (imp);
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index b9c8319..c8b4f97 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -577,6 +577,11 @@ pixman_implementation_t *
 _pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_ARM_V5
+pixman_implementation_t *
+_pixman_implementation_create_arm_v5 (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
