Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | /* * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ #include <arm/arch.h> // Only built for armv6 and higher. #if defined _ARM_ARCH_6 // If we're building for armv7, and not for DYLD, then we have a symbol // resolver so we need to rename these implementations. #if defined _ARM_ARCH_7 && !defined VARIANT_DYLD #define _memset_pattern4 _memset_pattern4$VARIANT$Generic #define _memset_pattern8 _memset_pattern8$VARIANT$Generic #define _memset_pattern16 _memset_pattern16$VARIANT$Generic #endif #include <mach/machine/asm.h> /* * This file contains the following functions: * * void memset_pattern4(void *b, const void *c4, size_t len) * void memset_pattern8(void *b, const void *c8, size_t len) * void memset_pattern16(void *b, const void *c16, size_t len) * * The memset() is implemented in the bzero.s file. * * This is a reasonably well optimized version of memset_pattern* routines * implemented for the ARM9 and ARM11 processors using the ARMv6 instruction * set. These routines use the ARM's core registers. * * The algorithm is to align the destination pointer on a 16 byte boundary * and then blast data 64 bytes at a time, in two stores of 32 bytes per loop. * */ .text .align 2 .syntax unified /*----------------------------------------------------------------------------*/ /* void memset_pattern4(void *ptr, const void *pattern4, size_t len); */ /* */ /* r0 << destination pointer */ /* r1 << pointer to 4-byte pattern */ /* r2 << 'len' (length of destination buffer in bytes) */ /*----------------------------------------------------------------------------*/ .globl _memset_pattern4 _memset_pattern4: cmp r2, #0 /* check if len is zero */ bxeq lr /* return if length is zero */ /* We need some registers, so save volatiles on stack */ /* Avoid r7 (frame pointer) and r9 (thread register) */ stmfd sp!, {r4-r7, lr} add r7, sp, #12 /* establish frame */ stmfd sp!, {r8, r10-r11} /* copy destination base pointer r0 to r12 and leave r0 alone */ /* so that we return original pointer back to the caller */ mov r12, r0 /* Check if 'len' is long enough to bother alignment of destination */ /* pointer */ cmp r2, #32 /* long enough to bother aligning? */ movlt r3, #4 /* move pattern length into r3 */ movlt r10, #4 /* pattern index */ movlt r11, r1 /* move pattern pointer into r11 */ blt L_Short /* no */ /* move 'len' into r1, get 4-byte pattern in r2 */ mov r6, r2 /* temporarily move 'len' in to r6 */ ldr r2, [r1]/* load 4-byte pattern into r2 */ mov r1, r6 /* move 'len' from r6 to r1 */ mov r3, r2 /* copy 4-byte pattern into r3, r4 and r5 registers */ mov r4, r2 mov r5, r2 L_NotShort: /* Check for 16 or 32 byte aligned destination pointer */ tst r12, #0x1F /* check for 32 byte aligned */ beq L_Aligned tst r12, #0xF /* check for 16 byte aligned */ beq L_16ByteAligned b L_Unaligned /* yes */ L_Bytewise: ldrb r4, [r11], #1 strb r4, [r12], #1 subs r10, #1 moveq r10, r3 moveq r11, r1 sub r2, #1 L_Short: cmp r2, #0 /* more bytes left? */ bne L_Bytewise ldm sp!, {r8, r10-r11} /* restores registers from stack */ ldm sp!, {r4-r7, pc} /* restore & return from subroutine */ /* 'len' is long enough to justify aligning the destination pointer */ /* */ /* By the time we reach here, data is stored in registers as follows: */ /* r1 << 'len' (length of destination buffer in bytes) */ /* r2-r5 << pattern; either 4x4byte OR 2x8byte OR 1x16-byte */ /* r12 << destination pointer copy (scratch register) */ /* r0 << destination pointer original */ /* */ /* Use r11 as scratch register to store the #bytes offset to 16-byte align */ /* */ /* Unaligned on 32-byte boundary, store 1-15 bytes until 16-byte aligned */ /* As we store these bytes, we rotate the pattern stored in r2-r5 to reflect */ /* the alignment. */ L_Unaligned: mov r11, r12, lsl #28 rsb r11, r11, #0 msr cpsr_f, r11 /* Bits[31:28] of cpsr now contain #bytes to align*/ L_Store15BytesAndRotatePattern: strbvs r2, [r12], #1 /* v is set, unaligned in the 1s column */ andvs r6, r2, #0xFF /* Rotate pattern right in r2-r5 by 1-byte */ andvs r8, r3, #0xFF /* Consider register r2-r5 and a contiguous */ andvs r10, r4, #0xFF /* 16-byte register with r2 containing LSB */ andvs r11, r5, #0xFF /* and r5 containing MSB */ lsrvs r2, r2, #8 lsrvs r3, r3, #8 lsrvs r4, r4, #8 lsrvs r5, r5, #8 orrvs r2, r2, r8, lsl #24 orrvs r3, r3, r10, lsl #24 orrvs r4, r4, r11, lsl #24 orrvs r5, r5, r6, lsl #24 strhcs r2, [r12], #2 /* c is set, unaligned in the 2s column */ movcs r6, r2, lsl #16 /* Rotate pattern right in r2-r5 by 2-bytes */ movcs r8, r3, lsl #16 movcs r10, r4, lsl #16 movcs r11, r5, lsl #16 lsrcs r2, r2, #16 lsrcs r3, r3, #16 lsrcs r4, r4, #16 lsrcs r5, r5, #16 orrcs r2, r2, r8 orrcs r3, r3, r10 orrcs r4, r4, r11 orrcs r5, r5, r6 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */ moveq r6, r2 /* Rotate pattern right in r2-r5 by 4-bytes */ moveq r2, r3 moveq r3, r4 moveq r4, r5 moveq r5, r6 stmmi r12!, {r2-r3} /* n is set, unaligned in the 8s column */ movmi r6, r2 /* Rotate pattern right in r2-r5 by 4-bytes */ movmi r8, r3 movmi r2, r4 movmi r3, r5 movmi r4, r6 movmi r5, r8 mrs r11, cpsr /*copy cpsr in to r11 */ subs r1, r1, r11, lsr #28 ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ /* By the time we reach here, we are 16-byte aligned and r2-r5 contains */ /* rotated pattern. Now lets make sure we are 32-byte aligned. */ L_16ByteAligned: tst r12, #(1 << 4) stmne r12!, {r2-r5} subsne r1, r1, #16 /* By the time we reach here, data is stored in registers as follows: */ /* r1 << 'len' (remaining length of destination buffer in bytes) */ /* r2-r5 << rotated pattern; either 4x4byte OR 2x8byte OR 1x16-byte */ /* r12 << aligned destination pointer copy (scratch register) */ L_Aligned: cmp r1, #64 blt L_AlignedLessThan64 /* Copy pattern in four more registers so that we can do 64 byte transfers */ mov r6, r2 mov r8, r3 mov r10, r4 mov r11, r5 /* At this point, we are 16-byte aligned and 'len' is greater than 64 bytes */ /* Lets transfer 64 bytes at a time until len becomes less than 64 bytes */ sub r1, r1, #64 /* pre-subtract to avoid extra compare in loop */ L_Loop64: stm r12!, {r2-r6, r8, r10-r11} subs r1, r1, #64 stm r12!, {r2-r6, r8, r10-r11} bge L_Loop64 /* return if 'len' is zero */ adds r1, r1, #64 /* readjust length; previously subtracted extra 64*/ ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ L_AlignedLessThan64: /* do we have 16 or more bytes left */ cmp r1, #16 stmge r12!, {r2-r5} subsge r1, r1, #16 bgt L_AlignedLessThan64 ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ L_AlignedLessThan16: /* store last up-to 15 bytes */ /* move the remaining len bits [3:0] to the flags area of cpsr */ mov r1, r1, lsl #28 msr cpsr_f, r1 stmmi r12!, {r2-r3} /* n is set, store 8 bytes */ movmi r2, r4 /* shift vector down 8 bytes */ movmi r3, r5 streq r2, [r12], #4 /* z is set, store 4 bytes */ moveq r2, r3 /* shift vector down 4 bytes */ strhcs r2, [r12], #2 /* c is set, store 2 bytes */ lsrcs r2, #16 /* shift register right 2 bytes */ strbvs r2, [r12], #1 /* v is set, store 1 byte */ ldm sp!, {r8, r10-r11} /* restores registers from stack */ ldm sp!, {r4-r7, pc} /* restore & return from subroutine */ /*----------------------------------------------------------------------------*/ /* void memset_pattern8(void *ptr, const void *pattern8, size_t len); */ /* */ /* r0 << destination pointer */ /* r1 << pointer to 8-byte pattern */ /* r2 << 'len' (length of destination buffer in bytes) */ /*----------------------------------------------------------------------------*/ .globl _memset_pattern8 _memset_pattern8: cmp r2, #0 /* check if len is zero */ bxeq lr /* return if length is zero */ /* We need some registers, so save volatiles on stack */ /* Avoid r7 (frame pointer) and r9 (thread register) */ stmfd sp!, {r4-r7, lr} add r7, sp, #12 /* establish frame */ stmfd sp!, {r8, r10-r11} /* copy destination base pointer r0 to r12 and leave r0 alone */ /* so that we return original pointer back to the caller */ mov r12, r0 /* Check if 'len' is long enough to bother alignment of destination */ /* pointer */ cmp r2, #32 /* long enough to bother aligning? */ movlt r3, #8 /* move pattern length into r3 */ movlt r10, #8 /* pattern index */ movlt r11, r1 /* move pattern pointer into r11 */ blt L_Short /* no */ /* move 'len' into r1, get 8-byte pattern in r2-r3 */ mov r6, r2 /* temporarily move 'len' in to r6 */ ldr r2, [r1], #4 /* load 8-byte pattern into r2-r3 */ ldr r3, [r1], #4 mov r1, r6 /* move 'len' from r6 to r1 */ mov r4, r2 /* copy 8-byte pattern into r4-r5 registers */ mov r5, r3 b L_NotShort /* yes */ /*----------------------------------------------------------------------------*/ /* void memset_pattern16(void *ptr, const void *pattern16, size_t len); */ /* */ /* r0 << destination pointer */ /* r1 << pointer to 16-byte pattern */ /* r2 << 'len' (length of destination buffer in bytes) */ /*----------------------------------------------------------------------------*/ .globl _memset_pattern16 _memset_pattern16: cmp r2, #0 /* check if len is zero */ bxeq lr /* return if length is zero */ /* We need some registers, so save volatiles on stack */ /* Avoid r7 (frame pointer) and r9 (thread register) */ stmfd sp!, {r4-r7, lr} add r7, sp, #12 /* establish frame */ stmfd sp!, {r8, r10-r11} /* copy destination base pointer r0 to r12 and leave r0 alone */ /* so that we return original pointer back to the caller */ mov r12, r0 /* Check if 'len' is long enough to bother alignment of destination */ /* pointer */ cmp r2, #32 /* long enough to bother aligning? */ movlt r3, #16 /* move pattern length into r3 */ movlt r10, #16 /* pattern index */ movlt r11, r1 /* move pattern pointer into r11 */ blt L_Short /* no */ /* move 'len' into r1, get 16-byte pattern in r2-r5 */ mov r6, r2 /* temporarily move 'len' in to r6 */ ldr r2, [r1], #4 /* load 16-byte pattern into r2-r5 */ ldr r3, [r1], #4 ldr r4, [r1], #4 ldr r5, [r1], #4 mov r1, r6 /* move 'len' from r6 to r1 */ b L_NotShort /* yes */ #endif /* _ARM_ARCH_6 */ |