Loading...
ppc/string/memcmp.s Libc-498 Libc-320
--- Libc/Libc-498/ppc/string/memcmp.s
+++ Libc/Libc-320/ppc/string/memcmp.s
@@ -2,6 +2,8 @@
  * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
+ * 
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
@@ -24,13 +26,6 @@
 #include <mach/ppc/asm.h>
 #undef	ASSEMBLER
 
-/* We use mode-independent "g" opcodes such as "srgi".  These expand
- * into word operations when targeting __ppc__, and into doubleword
- * operations when targeting __ppc64__.
- */
-#include <architecture/ppc/mode_independent_asm.h>
-
-
 // ***************     ***********
 // * M E M C M P * and * B C M P *
 // ***************     ***********
@@ -46,33 +41,31 @@
 // finding a difference, we might get a spurious page fault by
 // reading bytes past the difference.  To avoid this, we never do a "lwz"
 // that crosses a page boundary.
-//
-// In 64-bit mode, this routine is doubleword parallel.
 
         .text
         .globl EXT(memcmp)
         .globl EXT(bcmp)
 
         .align 	5
-LEXT(memcmp)                        // int memcmp(const char *s1,const char *s2,size_t len);
+LEXT(memcmp)							// int memcmp(const char *s1,const char *s2,size_t len);
 LEXT(bcmp)							// int   bcmp(const char *s1,const char *s2,size_t len);
-        cmplgi	cr1,r5,2*GPR_BYTES  // is buffer too short to bother with parallel compares?
-        andi.	r0,r3,GPR_BYTES-1   // is LHS aligned?
+        cmplwi	cr1,r5,8			// is buffer too short to bother with word compares?
+        andi.	r0,r3,3				// is LHS word aligned?
         blt		cr1,Lshort			// short buffer, so just compare byte-by-byte
         beq		Laligned			// skip if aligned
-        subfic	r0,r0,GPR_BYTES     // r0 <- #bytes to align LHS
+        subfic	r0,r0,4				// r0 <- #bytes to word align LHS
         mtctr	r0					// set up for byte loop
         b		Lbyteloop
         
 // Handle short buffer or end-of-buffer.
 //		r3 = LHS ptr (unaligned)
 //		r4 = RHS ptr (unaligned)
-//		r5 = length remaining in buffer (0..2*GPR_BYTES-1)
+//		r5 = length remaining in buffer (0..7)
 
 Lshort:
-        cmpgi	r5,0				// null buffer?
+        cmpwi	r5,0				// null buffer?
         mtctr	r5					// assume not null, and set up for loop
-        bne     Lshortloop			// buffer not null
+        bne		Lshortloop			// buffer not null
         li		r3,0				// say "equal"
         blr
         
@@ -88,14 +81,14 @@
         sub		r3,r7,r8			// generate return value
         blr 
 
-// We're at a RHS page boundary.  Compare GPR_BYTES bytes in order to cross the
-// page but still keep the LHS ptr aligned.
+// We're at a RHS page boundary.  Compare 4 bytes in order to cross the
+// page but still keep the LHS ptr word-aligned.
 
 Lcrosspage:
-        cmplgi	r5,2*GPR_BYTES      // enough bytes left to use parallel compares?
-        li		r0,GPR_BYTES        // get #bytes to cross RHS page
+        cmplwi	r5,8				// enough bytes left to use word compares?
+        li		r0,4				// get #bytes to cross RHS page
         blt		Lshort				// buffer is about to end
-        mtctr	r0
+        mtctr	r0					// set up to compare 4 bytes
         b		Lbyteloop
         
 // Compare byte-by-byte.
@@ -114,14 +107,14 @@
         cmpw	r7,r8				// compare the bytes
         bdnzt	eq,Lbyteloop		// loop if more to go and bytes are equal
         
-        bne     Ldifferent			// done if we found differing bytes
+        bne		Ldifferent			// done if we found differing bytes
                 
-// LHS is now aligned.  Loop over words/doublewords until end of RHS page or buffer.
-// When we get to the end of the page, we compare 4/8 bytes, so that we keep
-// the LHS aligned.
+// LHS is now word aligned.  Loop over words until end of RHS page or buffer.
+// When we get to the end of the page, we compare 4 bytes, so that we keep
+// the LHS word aligned.
 //		r3 = LHS ptr (aligned)
 //		r4 = RHS ptr (unaligned)
-//		r5 = length remaining in buffer (>= GPR_BYTES bytes)
+//		r5 = length remaining in buffer (>= 4 bytes)
 
 Laligned:
         rlwinm	r9,r4,0,0xFFF		// get RHS offset in page
@@ -130,45 +123,40 @@
         subfe	r8,r5,r5			// * r9 <- min(r0,r5),
         and		r7,r7,r8			// * using algorithm in Compiler Writer's Guide
         add		r9,r0,r7			// ***
-        srgi.	r8,r9,LOG2_GPR_BYTES// get #words/doublewords we can compare
-        clrrgi  r9,r9,LOG2_GPR_BYTES// get #bytes we will compare word-parallel
+        srwi.	r8,r9,2				// get #words we can compare
+        rlwinm	r9,r9,0,0,29		// get #bytes we will compare word-parallel
         beq--	Lcrosspage			// we're at a RHS page boundary
         mtctr	r8					// set up loop count
         sub		r5,r5,r9			// decrement length remaining
         b		Lwordloop
         
-// Compare a word or doubleword at a time, until one of two conditions:
+// Compare a word at a time, until one of two conditions:
 //		- a difference is found
 //		- end of count (ie, end of buffer or RHS page, whichever is first)
 // At this point, registers are as follows:
 //		r3 = LHS ptr (aligned)
 //		r4 = RHS ptr (unaligned)
 //		r5 = length remaining in buffer (may be 0)
-//     ctr = count of word/doublewords until end of buffer or RHS page
+//     ctr = count of words until end of buffer or RHS page
 
-        .align	5					// align inner loop
+        .align	5					// align inner loop, which is 8 words long
 Lwordloop:
-        lg		r7,0(r3)			// r7 <- next aligned LHS word or doubleword
-        addi	r3,r3,GPR_BYTES
-        lg		r8,0(r4)			// r8 <- next unaligned RHS word or doubleword
-        addi	r4,r4,GPR_BYTES
-        xor.	r11,r7,r8			// compare them
+        lwz		r7,0(r3)			// r7 <- next 4 LHS bytes
+        addi	r3,r3,4
+        lwz		r8,0(r4)			// r8 <- next 4 RHS bytes
+        addi	r4,r4,4
+        xor.	r11,r7,r8			// compare the words
         bdnzt	eq,Lwordloop		// loop if ctr!=0 and cr0_eq
         
-        beq     Lcrosspage			// skip if buffer or page end reached wo difference
+        beq--	Lcrosspage			// skip if buffer or page end reached
         
 // Found differing bytes.
 
-        cntlzg	r0,r11				// find 1st difference (r0 = 0..31 or 63)
-        rlwinm	r9,r0,0,0x38		// byte align bit offset (r9 = 0,8,16, or 24 etc)
-        addi	r0,r9,8				// now, r0 = 8, 16, 24, or 32 etc
-#if defined(__ppc__)
+        cntlzw	r0,r11				// find 1st difference (r0 = 0..31)
+        rlwinm	r9,r0,0,0x18		// byte align bit offset (r9 = 0,8,16, or 24)
+        addi	r0,r9,8				// now, r0 = 8, 16, 24, or 32
         rlwnm	r7,r7,r0,24,31		// right justify differing bytes and mask off rest
         rlwnm	r8,r8,r0,24,31
-#else
-        rldcl   r7,r7,r0,56         // right justify differing bytes and mask off rest
-        rldcl   r8,r8,r0,56
-#endif
 
 Ldifferent:							// bytes in r7 and r8 differ
         sub		r3,r7,r8			// compute return value