Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | /* * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ #include <mach/machine/asm.h> /* * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based * cores. * * The algorithm is to align the destination pointer on a 32 byte boundary and then * blast data 64 bytes at a time, in two stores of 32 bytes per loop. */ .text .align 2 .globl _memset /* void *memset(void *ptr, int c, size_t len); */ _memset: /* move len into r1, unpack c into r2 */ mov r3, r2 and r1, r1, #0xff orr r1, r1, r1, lsl #8 orr r2, r1, r1, lsl #16 mov r1, r3 b Lbzeroengine .globl _bzero /* void bzero(void *ptr, size_t len); */ _bzero: /* zero out r2 so we can be just like memset(0) */ mov r2, #0 Lbzeroengine: /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */ mov r12, r0 /* copy r2 into r3 for 64-bit stores */ mov r3, r2 /* check for zero len */ cmp r1, #0 bxeq lr /* fall back to a bytewise store for less than 32 bytes */ cmp r1, #32 blt L_bytewise /* check for 32 byte unaligned ptr */ tst r12, #0x1f bne L_unaligned /* make sure we have more than 64 bytes to zero */ cmp r1, #64 blt L_lessthan64aligned /* >= 64 bytes of len, 32 byte aligned */ L_64ormorealigned: /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */ stmfd sp!, { r4-r6, r8, r10-r11 } mov r4, r2 mov r5, r2 mov r6, r2 mov r8, r2 mov r10, r2 mov r11, r2 /* pre-subtract 64 from the len to avoid an extra compare in the loop */ sub r1, r1, #64 L_64loop: stmia r12!, { r2-r6, r8, r10-r11 } subs r1, r1, #64 stmia r12!, { r2-r6, r8, r10-r11 } bge L_64loop /* restore the saved regs */ ldmfd sp!, { r4-r6, r8, r10-r11 } /* check for completion (had previously subtracted an extra 64 from len) */ adds r1, r1, #64 bxeq lr L_lessthan64aligned: /* do we have 16 or more bytes left */ cmp r1, #16 stmgeia r12!, { r2-r3 } stmgeia r12!, { r2-r3 } subges r1, r1, #16 bgt L_lessthan64aligned bxeq lr L_lessthan16aligned: /* store 0 to 15 bytes */ mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */ msr cpsr_f, r1 stmmiia r12!, { r2-r3 } /* n is set, store 8 bytes */ streq r2, [r12], #4 /* z is set, store 4 bytes */ strcsh r2, [r12], #2 /* c is set, store 2 bytes */ strvsb r2, [r12], #1 /* v is set, store 1 byte */ bx lr L_bytewise: /* bytewise copy, 2 bytes at a time, alignment not guaranteed */ subs r1, r1, #2 strb r2, [r12], #1 strplb r2, [r12], #1 bhi L_bytewise bx lr L_unaligned: /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */ mov r3, r12, lsl #28 rsb r3, r3, #0x00000000 msr cpsr_f, r3 strvsb r2, [r12], #1 /* v is set, unaligned in the 1s column */ strcsh r2, [r12], #2 /* c is set, unaligned in the 2s column */ streq r2, [r12], #4 /* z is set, unaligned in the 4s column */ strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */ strmi r2, [r12], #4 subs r1, r1, r3, lsr #28 bxeq lr /* we had previously trashed r3, restore it */ mov r3, r2 /* now make sure we're 32 byte aligned */ tst r12, #(1 << 4) stmneia r12!, { r2-r3 } stmneia r12!, { r2-r3 } subnes r1, r1, #16 /* we're now aligned, check for >= 64 bytes left */ cmp r1, #64 bge L_64ormorealigned b L_lessthan64aligned |