bcopy_Generic.s diff - arm/string/bcopy_Generic.s - Libc source code Libc-825.25

arm/string/bcopy_Generic.s Libc-825.25 ⇄ /dev/null
--- Libc/Libc-825.25/arm/string/bcopy_Generic.s
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-/*****************************************************************************
- * ARMv5 and ARMv6 implementation, also used in dyld on later archs          *
- *****************************************************************************/
- 
-#include <arm/arch.h>
-#if !defined _ARM_ARCH_7 || defined VARIANT_DYLD
-
-.text
-.align 2
-	
-	.globl _memcpy
-	.globl _bcopy
-	.globl _memmove
-
-_bcopy:		/* void bcopy(const void *src, void *dest, size_t len); */
-	mov		r3, r0
-	mov		r0, r1
-	mov		r1, r3
-
-_memcpy:		/* void *memcpy(void *dest, const void *src, size_t len); */
-_memmove: 	/* void *memmove(void *dest, const void *src, size_t len); */
-	/* check for zero len or if the pointers are the same */
-	cmp		r2, #0
-	cmpne	r0, r1
-	bxeq	lr
-
-	/* save r0 (return value), r4 (scratch), and r5 (scratch) */
-	stmfd	sp!, { r0, r4, r5, r7, lr }
-	add	r7, sp, #12
-	
-	/* check for overlap. r3 <- distance between src & dest */
-	subhs	r3, r0, r1
-	sublo	r3, r1, r0
-	cmp		r3, r2			/* if distance(src, dest) < len, we have overlap */
-	blo		Loverlap
-
-Lnormalforwardcopy:
-	/* are src and dest dissimilarly word aligned? */
-	mov		r12, r0, lsl #30
-	cmp		r12, r1, lsl #30
-	bne		Lnonwordaligned_forward
-
-	/* if len < 64, do a quick forward copy */
-	cmp		r2, #64
-	blt		Lsmallforwardcopy
-
-	/* check for 16 byte src/dest unalignment */
-	tst		r0, #0xf
-	bne		Lsimilarlyunaligned
-
-	/* check for 32 byte dest unalignment */
-	tst		r0, #(1<<4)
-	bne		Lunaligned_32
-
-Lmorethan64_aligned:
-	/* save some more registers to use in the copy */
-	stmfd	sp!, { r6, r8, r10, r11 }
-
-	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
-	sub		r2, r2, #64
-
-L64loop:
-	/* copy 64 bytes at a time */
-	ldmia	r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-#ifdef _ARM_ARCH_6
-	pld		[r1, #32]
-#endif
-	stmia	r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-	ldmia	r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-	subs	r2, r2, #64
-#ifdef _ARM_ARCH_6
-	pld		[r1, #32]
-#endif
-	stmia	r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-	bge		L64loop
-
-	/* restore the scratch registers we just saved */
-	ldmfd	sp!, { r6, r8, r10, r11 }
-
-	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
-	adds	r2, r2, #64
-	beq		Lexit
-
-Llessthan64_aligned:
-	/* copy 16 bytes at a time until we have < 16 bytes */
-	cmp		r2, #16
-	ldmgeia	r1!, { r3, r4, r5, r12 }
-	stmgeia	r0!, { r3, r4, r5, r12 }
-	subges	r2, r2, #16
-	bgt		Llessthan64_aligned
-	beq		Lexit
-	
-Llessthan16_aligned:
-	mov		r2, r2, lsl #28
-	msr		cpsr_f, r2
-
-	ldmmiia	r1!, { r2, r3 }
-	ldreq	r4, [r1], #4
-	ldrcsh	r5, [r1], #2
-	ldrvsb	r12, [r1], #1
-
-	stmmiia	r0!, { r2, r3 }
-	streq	r4, [r0], #4
-	strcsh	r5, [r0], #2
-	strvsb	r12, [r0], #1
-	b		Lexit
-
-Lsimilarlyunaligned:
-	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
-	mov		r12, r0, lsl #28
-	rsb		r12, r12, #0
-	msr		cpsr_f, r12
-
-	ldrvsb	r3, [r1], #1
-	ldrcsh	r4, [r1], #2
-	ldreq	r5, [r1], #4
-
-	strvsb	r3, [r0], #1
-	strcsh	r4, [r0], #2
-	streq	r5, [r0], #4
-
-	ldmmiia	r1!, { r3, r4 }
-	stmmiia	r0!, { r3, r4 }
-
-	subs	r2, r2, r12, lsr #28
-	beq		Lexit
-
-Lunaligned_32:
-	/* bring up to dest 32 byte alignment */
-	tst		r0, #(1 << 4)
-	ldmneia	r1!, { r3, r4, r5, r12 }
-	stmneia	r0!, { r3, r4, r5, r12 }
-	subne	r2, r2, #16
-
-	/* we should now be aligned, see what copy method we should use */
-	cmp		r2, #64
-	bge		Lmorethan64_aligned
-	b		Llessthan64_aligned
-	
-Lbytewise2:
-	/* copy 2 bytes at a time */
-	subs	r2, r2, #2
-
-	ldrb	r3, [r1], #1
-	ldrplb	r4, [r1], #1
-
-	strb	r3, [r0], #1
-	strplb	r4, [r0], #1
-
-	bhi		Lbytewise2
-	b		Lexit
-
-Lbytewise:
-	/* simple bytewise forward copy */
-	ldrb	r3, [r1], #1
-	subs	r2, r2, #1
-	strb	r3, [r0], #1
-	bne		Lbytewise
-	b		Lexit
-
-Lsmallforwardcopy:
-	/* src and dest are word aligned similarly, less than 64 bytes to copy */
-	cmp		r2, #4
-	blt		Lbytewise2
-
-	/* bytewise copy until word aligned */
-	tst		r1, #3
-Lwordalignloop:
-	ldrneb	r3, [r1], #1
-	strneb	r3, [r0], #1
-	subne	r2, r2, #1
-	tstne	r1, #3
-	bne		Lwordalignloop
-
-	cmp		r2, #16
-	bge		Llessthan64_aligned
-	blt		Llessthan16_aligned
-
-Loverlap:
-	/* src and dest overlap in some way, len > 0 */
-	cmp		r0, r1				/* if dest > src */
-	bhi		Loverlap_srclower
-
-Loverlap_destlower:
-	/* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
-	cmp		r3, #64
-	bge		Lnormalforwardcopy 	/* overlap is greater than one stride of the copy, use normal copy */
-
-	cmp		r3, #2
-	bge		Lbytewise2
-	b		Lbytewise
-
-	/* the following routines deal with having to copy in the reverse direction */
-Loverlap_srclower:
-	/* src < dest, with overlap */
-
-	/* src += len; dest += len; */
-	add		r0, r0, r2
-	add		r1, r1, r2
-
-	/* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
-	cmp		r2, #64				/* less than 64 bytes to copy? */
-	cmpgt	r3, #64				/* less than 64 bytes of nonoverlap? */
-	blt		Lbytewise_reverse
-
-	/* test of src and dest are nonword aligned differently */
-	mov		r3, r0, lsl #30
-	cmp		r3, r1, lsl #30
-	bne		Lbytewise_reverse
-
-	/* test if src and dest are non word aligned or dest is non 16 byte aligned */
-	tst		r0, #0xf
-	bne		Lunaligned_reverse_similarly
-
-	/* test for dest 32 byte alignment */
-	tst		r0, #(1<<4)
-	bne		Lunaligned_32_reverse_similarly
-
-	/* 64 byte reverse block copy, src and dest aligned */
-Lmorethan64_aligned_reverse:
-	/* save some more registers to use in the copy */
-	stmfd	sp!, { r6, r8, r10, r11 }
-
-	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
-	sub		r2, r2, #64
-
-L64loop_reverse:
-	/* copy 64 bytes at a time */
-	ldmdb	r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-#ifdef _ARM_ARCH_6
-	pld		[r1, #-32]
-#endif
-	stmdb	r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }	
-	ldmdb	r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }	
-	subs	r2, r2, #64
-#ifdef _ARM_ARCH_6
-	pld		[r1, #-32]
-#endif
-	stmdb	r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }	
-	bge		L64loop_reverse
-
-	/* restore the scratch registers we just saved */
-	ldmfd	sp!, { r6, r8, r10, r11 }
-
-	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
-	adds	r2, r2, #64
-	beq		Lexit
-
-Lbytewise_reverse:
-	ldrb	r3, [r1, #-1]!
-	strb	r3, [r0, #-1]!
-	subs	r2, r2, #1
-	bne		Lbytewise_reverse
-	b		Lexit
-
-Lunaligned_reverse_similarly:
-	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
-	mov		r12, r0, lsl #28
-	msr		cpsr_f, r12
-
-	ldrvsb	r3, [r1, #-1]!
-	ldrcsh	r4, [r1, #-2]!
-	ldreq	r5, [r1, #-4]!
-
-	strvsb	r3, [r0, #-1]!
-	strcsh	r4, [r0, #-2]!
-	streq	r5, [r0, #-4]!
-
-	ldmmidb	r1!, { r3, r4 }
-	stmmidb	r0!, { r3, r4 }
-
-	subs	r2, r2, r12, lsr #28
-	beq		Lexit
-
-Lunaligned_32_reverse_similarly:
-	/* bring up to dest 32 byte alignment */
-	tst		r0, #(1 << 4)
-	ldmnedb	r1!, { r3, r4, r5, r12 }
-	stmnedb	r0!, { r3, r4, r5, r12 }
-	subne	r2, r2, #16
-
-	/* we should now be aligned, see what copy method we should use */
-	cmp		r2, #64
-	bge		Lmorethan64_aligned_reverse
-	b		Lbytewise_reverse
-
-	/* the following routines deal with non word aligned copies */
-Lnonwordaligned_forward:
-	cmp		r2, #8
-	blt		Lbytewise2			/* not worth the effort with less than 24 bytes total */
-
-	/* bytewise copy until src word aligned */
-	tst		r1, #3
-Lwordalignloop2:
-	ldrneb	r3, [r1], #1
-	strneb	r3, [r0], #1
-	subne	r2, r2, #1
-	tstne	r1, #3
-	bne		Lwordalignloop2
-
-	/* figure out how the src and dest are unaligned */
-	and		r3, r0, #3
-	cmp		r3, #2
-	blt		Lalign1_forward
-	beq		Lalign2_forward
-	bgt		Lalign3_forward
-
-Lalign1_forward:
-	/* the dest pointer is 1 byte off from src */
-	mov		r12, r2, lsr #2		/* number of words we should copy */
-	sub		r0, r0, #1
-
-	/* prime the copy */
-	ldrb	r4, [r0]			/* load D[7:0] */
-
-Lalign1_forward_loop:
-	ldr		r3, [r1], #4		/* load S */
-	orr		r4, r4, r3, lsl #8	/* D[31:8] = S[24:0] */
-	str		r4, [r0], #4		/* save D */
-	mov		r4, r3, lsr #24		/* D[7:0] = S[31:25] */
-	subs	r12, r12, #1
-	bne		Lalign1_forward_loop
-
-	/* finish the copy off */
-	strb	r4, [r0], #1		/* save D[7:0] */
-
-	ands	r2, r2, #3
-	beq		Lexit
-	b		Lbytewise2
-
-Lalign2_forward:
-	/* the dest pointer is 2 bytes off from src */
-	mov		r12, r2, lsr #2		/* number of words we should copy */
-	sub		r0, r0, #2
-
-	/* prime the copy */
-	ldrh	r4, [r0]			/* load D[15:0] */
-
-Lalign2_forward_loop:
-	ldr		r3, [r1], #4		/* load S */
-	orr		r4, r4, r3, lsl #16	/* D[31:16] = S[15:0] */
-	str		r4, [r0], #4		/* save D */
-	mov		r4, r3, lsr #16		/* D[15:0] = S[31:15] */
-	subs	r12, r12, #1
-	bne		Lalign2_forward_loop
-
-	/* finish the copy off */
-	strh	r4, [r0], #2		/* save D[15:0] */
-
-	ands	r2, r2, #3
-	beq		Lexit
-	b		Lbytewise2
-
-Lalign3_forward:
-	/* the dest pointer is 3 bytes off from src */
-	mov		r12, r2, lsr #2		/* number of words we should copy */
-	sub		r0, r0, #3
-
-	/* prime the copy */
-	ldr		r4, [r0]
-	and		r4, r4, #0x00ffffff	/* load D[24:0] */
-
-Lalign3_forward_loop:
-	ldr		r3, [r1], #4		/* load S */
-	orr		r4, r4, r3, lsl #24	/* D[31:25] = S[7:0] */
-	str		r4, [r0], #4		/* save D */
-	mov		r4, r3, lsr #8		/* D[24:0] = S[31:8] */
-	subs	r12, r12, #1
-	bne		Lalign3_forward_loop
-
-	/* finish the copy off */
-	strh	r4, [r0], #2		/* save D[15:0] */
-	mov		r4, r4, lsr #16
-	strb	r4, [r0], #1		/* save D[23:16] */
-
-	ands	r2, r2, #3
-	beq		Lexit
-	b		Lbytewise2
-
-Lexit:
-	ldmfd	sp!, {r0, r4, r5, r7, pc}
-
-#endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD
-