/*
 * ARM memcpy asm replacement.
 *
 * Copyright (C) 2009 Bluush Dev Team.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */


#define _LABEL(f)   f :


.global direct_armasm_memcpy



	.code 32


/*
 * Fast copy n bytes from source p2 to destination p1.
 *
 *     void *direct_armasm_memcpy(void *p1, const void *p2, int n)
 */

_LABEL(direct_armasm_memcpy)
        teq      r2,#0       	/* is arg n == 0 ? */
        moveq    pc,lr       	/* if n == 0, return */

        stmdb    sp!,{lr}    	/* push return address */
        mov      r12,r0       	/* copy pointer p1 */
        cmp      r2,#0x8     	/* is string long or short? */
        ble      ByteSerial  	/* jump if long string */

        sub      r3,r0,r1    	/* compare pointers p1, p2 */
        tst      r3,#3       	/* strings aligned same? */
        bne      ByteSerial  	/* jump if strings not aligned */

/*
 * Both strings are similarly aligned WRT word boundaries.
 * At least a portion of the data can be copied an entire
 * word at a time, which is faster than copying bytes.
 */
_LABEL(WordSerial)
        ands     r3,r0,#3    	/* check byte alignment */
        beq      WordAligned 	/* jump if p1, p2 word-aligned */

        rsb      r3,r3,#4    	/* m = no. of odd initial bytes */
        sub      r2,r2,r3    	/* n = n - m */

/*
 * If the two strings do not begin on word boundaries, begin
 * by copying the odd bytes that precede the first full word.
 */
_LABEL(PreLoop)
        ldrb     lr,[r1],#1  	/* read byte from string 2 */
        subs     r3,r3,#1    	/* --m (decrement loop count) */
        strb     lr,[r12],#1  	/* write byte to string 1 */
        bne      PreLoop     	/* loop if more bytes to move */

_LABEL(WordAligned)
        movs     r3,r2,asr #5 	/* any chunks of 8 words? */
        beq      OctsDone    	/* jump if no 8-word chunks */

        and      r2,r2,#0x1f 	/* subtract chunks from n */
        stmdb    sp!,{r4-r10} 	/* save registers on stack */

/*
 * The strings are long enough that we can transfer at least
 * some portion of the data in 8-word chunks.
 */
_LABEL(OctLoop)
        ldmia    r1!,{r4-r10,lr} /* load 8 words from string 2 */
        subs     r3,r3,#1    	/* more 8-word chunks to move? */
        stmia    r12!,{r4-r10,lr} /* write 8 words to string 1 */
        bne      OctLoop     	/* loop if more chunks */

        ldmia    sp!,{r4-r10} 	/* restore registers from stack */

_LABEL(OctsDone)
        movs     r3,r2,asr #2 	/* any more whole words to move? */
        beq      WordsDone   	/* jump if no more whole words */

/*
 * Copy as much of the remaining data as possible one word at
 * a time.
 */
_LABEL(WordLoop2)
        ldr      lr,[r1],#4  	/* read next word from string 2 */
        subs     r3,r3,#1    	/* decrement word count */
        str      lr,[r12],#4  	/* write next word to string 1 */
        bne      WordLoop2   	/* loop while more words to move */

_LABEL(WordsDone)
        ands     r2,r2,#3    	/* any last bytes to transfer? */
        ldmeqia  sp!,{pc}    	/* return if already done */

/*
 * The two strings do not end on word boundaries.
 * Copy the remaining data one byte at a time.
 */
_LABEL(ByteSerial)
        ldrb     lr,[r1],#1  	/* read byte from string 2 */
        subs     r2,r2,#1    	/* --n (decrement loop count) */
        strb     lr,[r12],#1  	/* write byte to string 1 */
        bne      ByteSerial  	/* loop if more bytes to move */

        ldmia    sp!,{pc}    	/* return to caller */

		.ltorg
		

 
