Loading...
arm/string/memcmp.s Libc-825.26 Libc-594.9.4
--- Libc/Libc-825.26/arm/string/memcmp.s
+++ Libc/Libc-594.9.4/arm/string/memcmp.s
@@ -37,81 +37,69 @@
 // int   bcmp(void *src1, void *src2, size_t length);
 // int memcmp(void *src1, void *src2, size_t length);
 
-.text
-.syntax unified
-#if defined __thumb2__
-	.code 16
-	.thumb_func _bcmp
-	.thumb_func _memcmp
-#else
-	.code 32
-#endif
-
-#define ESTABLISH_FRAME \
-	push   {r7,lr};\
-	mov     r7,     sp
-#define CLEAR_FRAME_AND_RETURN \
-	pop    {r7,pc}
-
 #include <arm/arch.h>
 
-#if defined _ARM_ARCH_6
-	#define BYTE_REVERSE(reg,tmp) \
-    rev     reg, reg
-#else // defined _ARM_ARCH_6
-// Prior to ARMv6, the REV instruction is not available.  We use a very cute
-// software workaround instead, which needs only a single scratch register.
-	#define BYTE_REVERSE(reg,tmp) \
-	eor     tmp, reg, reg, ror #16;\
-	bic     tmp, tmp, #0xff0000   ;\
-	mov     tmp,      tmp, lsr #8 ;\
-	eor     reg, tmp, reg, ror #8
-#endif // defined _ARM_ARCH_6
-
-.globl _bcmp
-.globl _memcmp
-.align 2
+    .text
+    .syntax unified
+#if defined __thumb2__
+    .code 16
+    .thumb_func _bcmp
+    .thumb_func _memcmp
+#else
+    .code 32
+#endif
+    .globl _bcmp
+    .globl _memcmp
+    .align 3
 _bcmp:
 _memcmp:
-    // If both buffers are not word aligned, jump to a byte-comparison loop.
-	ESTABLISH_FRAME
-	orr     ip,     r0, r1
-	tst     ip,         #3
-	bne     L_useByteComparisons
 
-    // As long as at least four bytes of length remain, load one word from each
-    // buffer and check if they are equal.
-0:  subs    r2,         #4
-	blo     L_lessThanFourBytesRemain
-	ldr     r3,    [r0],#4
-	ldr     ip,    [r1],#4
-	cmp     r3,     ip
-	beq     0b
-	
-    // If words from the two buffers compared unequal, we end up here.  We need
-    // to byte-swap both words, then subtract to determine the result (+/-1).
-	BYTE_REVERSE(r3,r1)
-	BYTE_REVERSE(ip,r2)
-    mov     r0,         #1
-	subs    r3,     ip
-	it      lo
-	movlo   r0,         #-1
-	CLEAR_FRAME_AND_RETURN
-	
-L_lessThanFourBytesRemain:
-    adds    r2,         #4
-L_useByteComparisons:
-	mov     r3,     r0
-    // If no bytes remain to compare, the buffers are equal and we return zero.
-    // Otherwise, load one byte from each buffer and check if they are equal.
-0:  subs    r2,         #1
-	blo     L_buffersAreEqual
-	ldrb    r0,    [r3],#1
-	ldrb    ip,    [r1],#1
-	subs    r0,         ip
-	beq     0b
-	CLEAR_FRAME_AND_RETURN
+#ifdef _ARM_ARCH_6
+    subs    ip,     r2,  #4     // if length < 4
+    bmi     L_useByteCompares   // jump to the byte comparison loop
     
-L_buffersAreEqual:
-    mov     r0,         #0
-	CLEAR_FRAME_AND_RETURN
+    orr     r3,     r0,  r1     // if the buffers are
+    tst     r3,          #3     // not word aligned
+    bne     L_useByteCompares   // jump to the byte comparison loop
+
+.align 3
+L_wordCompare:                  // Here we know that both buffers are word
+    ldr     r2,    [r0], #4     // aligned, and (length - 4) > 0, so at least
+    ldr     r3,    [r1], #4     // four bytes remain to be compared.  We load
+    subs    ip,          #4     // a word from each buffer, and byte reverse
+    bmi     L_lastWord          // the loaded words.  We also decrement the
+    rev     r2,     r2          // length by four and jump out of this loop if
+    rev     r3,     r3          // the result is negative.  Then we compare the
+    cmp     r2,     r3          // reversed words, and continue the loop only
+    beq     L_wordCompare       // if they are equal.
+L_wordsUnequal:
+    ite     hi                  // If the words compared unequal, return +/- 1
+    movhi   r0,     #1          // according to the result of the comparison.
+    movls   r0,     #-1         //
+    bx      lr                  //
+L_lastWord:
+    rev     r2,     r2          // If we just loaded the last complete words
+    rev     r3,     r3          // from the buffers, byte-reverse them and
+    cmp     r2,     r3          // compare.  If they are unequal, jump to the
+    bne     L_wordsUnequal      // return path.
+    add     r2,     ip,  #4     // Otherwise, fall into the cleanup code.
+#endif // _ARM_ARCH_6
+
+L_useByteCompares:
+    tst     r2,     r2          // If the length is exactly zero
+    beq     L_returnZero        // avoid doing any loads and return zero.
+    mov     r3,     r0
+.align 3
+L_byteCompareLoop:
+    ldrb    r0,    [r3], #1     // Load a byte from each buffer, and decrement
+    ldrb    ip,    [r1], #1     // the length by one.  If the decremented
+    subs    r2,     #1          // length is zero, exit the loop.  Otherwise
+    beq     L_lastByte          // subtract the loaded bytes; if their
+    subs    r0,     ip          // difference is zero, continue the comparison
+    beq     L_byteCompareLoop   // loop.  Otherwise, return their difference.
+    bx      lr
+L_returnZero:
+    mov     r0,     ip
+L_lastByte:
+    sub     r0,     ip          // Return the difference of the final bytes
+    bx      lr