Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | /* * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* ======================================= * BCOPY, MEMCPY, and MEMMOVE for Mac OS X * ======================================= * * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec. * This version might be used bringing up new processors, with known * Altivec bugs that need to be worked around. It is not particularly well * optimized. * * For 64-bit processors with a 128-byte cache line, running in either * 32- or 64-bit mode. This is written for 32-bit execution, the kernel * will translate to 64-bit code when it compiles the 64-bit commpage. * * Register usage. Note we use R2, so this code will not run in a PEF/CFM * environment. * r0 = "w7" or temp * r2 = "w8" * r3 = not used, as memcpy and memmove return 1st parameter as a value * r4 = source ptr ("rs") * r5 = count of bytes to move ("rc") * r6 = "w1" * r7 = "w2" * r8 = "w3" * r9 = "w4" * r10 = "w5" * r11 = "w6" * r12 = destination ptr ("rd") */ #define rs r4 #define rd r12 #define rc r5 #define rv r2 #define w1 r6 #define w2 r7 #define w3 r8 #define w4 r9 #define w5 r10 #define w6 r11 #define w7 r0 #define w8 r2 #include <sys/appleapiopts.h> #include <ppc/asm.h> #include <machine/cpu_capabilities.h> #include <machine/commpage.h> .text #define kLong 64 // too long for inline loopless code // Main entry points. .align 5 bcopy_64: // void bcopy(const void *src, void *dst, size_t len) cmplwi rc,kLong // short or long? sub w1,r4,r3 // must move in reverse if (rd-rs)<rc cmplw cr1,w1,rc // set cr1 blt iff we must move reverse mr rd,r4 // start to move registers to canonic spot mr rs,r3 blt LShort // handle short operands dcbt 0,r3 // touch in destination b LLong // join medium/long operand code // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses. .align 5 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len) Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len) cmplwi rc,kLong // short or long? sub w1,r3,r4 // must move in reverse if (rd-rs)<rc dcbt 0,r4 // touch in the first line of source cmplw cr1,w1,rc // set cr1 blt iff we must move reverse mr rd,r3 // must leave r3 alone, it is return value for memcpy etc bge LLong // handle medium or long operands // Handle short operands. LShort: mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time) mtcrf 0x01,rc // put length bits 28-31 in cr7 blt cr1,LShortReverse // Forward short operands. This is the most frequent case, so it is inline. LShort64: // enter to xfer last 64 bytes bf 26,0f // 64-byte chunk to xfer? ld w1,0(rs) ld w2,8(rs) ld w3,16(rs) ld w4,24(rs) addi rs,rs,32 std w1,0(rd) std w2,8(rd) std w3,16(rd) std w4,24(rd) addi rd,rd,32 0: bf 27,1f // quadword to move? ld w1,0(rs) ld w2,8(rs) addi rs,rs,16 std w1,0(rd) std w2,8(rd) addi rd,rd,16 1: bf 28,2f // doubleword? ld w1,0(rs) addi rs,rs,8 std w1,0(rd) addi rd,rd,8 2: bf 29,3f // word? lwz w1,0(rs) addi rs,rs,4 stw w1,0(rd) addi rd,rd,4 3: bf 30,4f // halfword to move? lhz w1,0(rs) addi rs,rs,2 sth w1,0(rd) addi rd,rd,2 4: bflr 31 // skip if no odd byte lbz w1,0(rs) stb w1,0(rd) blr // Handle short reverse operands. // cr6 = bits 26-27 of length // cr7 = bits 28-31 of length LShortReverse: add rs,rs,rc // adjust ptrs for reverse move add rd,rd,rc LShortReverse64: // enter to xfer last 64 bytes bf 26,0f // 64-byte chunk to xfer? ld w1,-8(rs) ld w2,-16(rs) ld w3,-24(rs) ldu w4,-32(rs) std w1,-8(rd) std w2,-16(rd) std w3,-24(rd) stdu w4,-32(rd) 0: bf 27,1f // quadword to move? ld w1,-8(rs) ldu w2,-16(rs) std w1,-8(rd) stdu w2,-16(rd) 1: bf 28,2f // doubleword? ldu w1,-8(rs) stdu w1,-8(rd) 2: bf 29,3f // word? lwzu w1,-4(rs) stwu w1,-4(rd) 3: bf 30,4f // halfword to move? lhzu w1,-2(rs) sthu w1,-2(rd) 4: bflr 31 // done if no odd byte lbz w1,-1(rs) // no update stb w1,-1(rd) blr // Long operands. // cr1 = blt iff we must move reverse .align 4 LLong: dcbtst 0,rd // touch in destination neg w3,rd // start to compute #bytes to align destination andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination blt cr1,LLongReverse // handle reverse moves mtctr w6 // set up for loop to align destination sub rc,rc,w6 // adjust count beq LAligned // destination already 8-byte aligned 1: lbz w1,0(rs) addi rs,rs,1 stb w1,0(rd) addi rd,rd,1 bdnz 1b // Destination is 8-byte aligned. LAligned: srwi. w2,rc,6 // w2 <- count of 64-byte chunks mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time) mtcrf 0x01,rc // put length bits 28-31 in cr7 beq LShort64 // no 64-byte chunks mtctr w2 b 1f // Loop moving 64-byte chunks. .align 5 1: ld w1,0(rs) ld w2,8(rs) ld w3,16(rs) ld w4,24(rs) ld w5,32(rs) ld w6,40(rs) ld w7,48(rs) ld w8,56(rs) addi rs,rs,64 std w1,0(rd) std w2,8(rd) std w3,16(rd) std w4,24(rd) std w5,32(rd) std w6,40(rd) std w7,48(rd) std w8,56(rd) addi rd,rd,64 bdnz 1b b LShort64 // Handle reverse moves. LLongReverse: add rd,rd,rc // point to end of operands add rs,rs,rc andi. r0,rd,7 // is destination 8-byte aligned? sub rc,rc,r0 // adjust count mtctr r0 // set up for byte loop beq LRevAligned // already aligned 1: lbzu w1,-1(rs) stbu w1,-1(rd) bdnz 1b // Destination is 8-byte aligned. LRevAligned: srwi. w2,rc,6 // w2 <- count of 64-byte chunks mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time) mtcrf 0x01,rc // put length bits 28-31 in cr7 beq LShortReverse64 // no 64-byte chunks mtctr w2 b 1f // Loop over 64-byte chunks (reverse). .align 5 1: ld w1,-8(rs) ld w2,-16(rs) ld w3,-24(rs) ld w4,-32(rs) ld w5,-40(rs) ld w6,-48(rs) ld w7,-56(rs) ldu w8,-64(rs) std w1,-8(rd) std w2,-16(rd) std w3,-24(rd) std w4,-32(rd) std w5,-40(rd) std w6,-48(rd) std w7,-56(rd) stdu w8,-64(rd) bdnz 1b b LShortReverse64 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64) |