Loading...
arm/string/memcmp.s Libc-594.9.4 Libc-825.26
--- Libc/Libc-594.9.4/arm/string/memcmp.s
+++ Libc/Libc-825.26/arm/string/memcmp.s
@@ -37,69 +37,81 @@
 // int   bcmp(void *src1, void *src2, size_t length);
 // int memcmp(void *src1, void *src2, size_t length);
 
+.text
+.syntax unified
+#if defined __thumb2__
+	.code 16
+	.thumb_func _bcmp
+	.thumb_func _memcmp
+#else
+	.code 32
+#endif
+
+#define ESTABLISH_FRAME \
+	push   {r7,lr};\
+	mov     r7,     sp
+#define CLEAR_FRAME_AND_RETURN \
+	pop    {r7,pc}
+
 #include <arm/arch.h>
 
-    .text
-    .syntax unified
-#if defined __thumb2__
-    .code 16
-    .thumb_func _bcmp
-    .thumb_func _memcmp
-#else
-    .code 32
-#endif
-    .globl _bcmp
-    .globl _memcmp
-    .align 3
+#if defined _ARM_ARCH_6
+	#define BYTE_REVERSE(reg,tmp) \
+    rev     reg, reg
+#else // defined _ARM_ARCH_6
+// Prior to ARMv6, the REV instruction is not available.  We use a very cute
+// software workaround instead, which needs only a single scratch register.
+	#define BYTE_REVERSE(reg,tmp) \
+	eor     tmp, reg, reg, ror #16;\
+	bic     tmp, tmp, #0xff0000   ;\
+	mov     tmp,      tmp, lsr #8 ;\
+	eor     reg, tmp, reg, ror #8
+#endif // defined _ARM_ARCH_6
+
+.globl _bcmp
+.globl _memcmp
+.align 2
 _bcmp:
 _memcmp:
+    // If both buffers are not word aligned, jump to a byte-comparison loop.
+	ESTABLISH_FRAME
+	orr     ip,     r0, r1
+	tst     ip,         #3
+	bne     L_useByteComparisons
 
-#ifdef _ARM_ARCH_6
-    subs    ip,     r2,  #4     // if length < 4
-    bmi     L_useByteCompares   // jump to the byte comparison loop
+    // As long as at least four bytes of length remain, load one word from each
+    // buffer and check if they are equal.
+0:  subs    r2,         #4
+	blo     L_lessThanFourBytesRemain
+	ldr     r3,    [r0],#4
+	ldr     ip,    [r1],#4
+	cmp     r3,     ip
+	beq     0b
+	
+    // If words from the two buffers compared unequal, we end up here.  We need
+    // to byte-swap both words, then subtract to determine the result (+/-1).
+	BYTE_REVERSE(r3,r1)
+	BYTE_REVERSE(ip,r2)
+    mov     r0,         #1
+	subs    r3,     ip
+	it      lo
+	movlo   r0,         #-1
+	CLEAR_FRAME_AND_RETURN
+	
+L_lessThanFourBytesRemain:
+    adds    r2,         #4
+L_useByteComparisons:
+	mov     r3,     r0
+    // If no bytes remain to compare, the buffers are equal and we return zero.
+    // Otherwise, load one byte from each buffer and check if they are equal.
+0:  subs    r2,         #1
+	blo     L_buffersAreEqual
+	ldrb    r0,    [r3],#1
+	ldrb    ip,    [r1],#1
+	subs    r0,         ip
+	beq     0b
+	CLEAR_FRAME_AND_RETURN
     
-    orr     r3,     r0,  r1     // if the buffers are
-    tst     r3,          #3     // not word aligned
-    bne     L_useByteCompares   // jump to the byte comparison loop
-
-.align 3
-L_wordCompare:                  // Here we know that both buffers are word
-    ldr     r2,    [r0], #4     // aligned, and (length - 4) > 0, so at least
-    ldr     r3,    [r1], #4     // four bytes remain to be compared.  We load
-    subs    ip,          #4     // a word from each buffer, and byte reverse
-    bmi     L_lastWord          // the loaded words.  We also decrement the
-    rev     r2,     r2          // length by four and jump out of this loop if
-    rev     r3,     r3          // the result is negative.  Then we compare the
-    cmp     r2,     r3          // reversed words, and continue the loop only
-    beq     L_wordCompare       // if they are equal.
-L_wordsUnequal:
-    ite     hi                  // If the words compared unequal, return +/- 1
-    movhi   r0,     #1          // according to the result of the comparison.
-    movls   r0,     #-1         //
-    bx      lr                  //
-L_lastWord:
-    rev     r2,     r2          // If we just loaded the last complete words
-    rev     r3,     r3          // from the buffers, byte-reverse them and
-    cmp     r2,     r3          // compare.  If they are unequal, jump to the
-    bne     L_wordsUnequal      // return path.
-    add     r2,     ip,  #4     // Otherwise, fall into the cleanup code.
-#endif // _ARM_ARCH_6
-
-L_useByteCompares:
-    tst     r2,     r2          // If the length is exactly zero
-    beq     L_returnZero        // avoid doing any loads and return zero.
-    mov     r3,     r0
-.align 3
-L_byteCompareLoop:
-    ldrb    r0,    [r3], #1     // Load a byte from each buffer, and decrement
-    ldrb    ip,    [r1], #1     // the length by one.  If the decremented
-    subs    r2,     #1          // length is zero, exit the loop.  Otherwise
-    beq     L_lastByte          // subtract the loaded bytes; if their
-    subs    r0,     ip          // difference is zero, continue the comparison
-    beq     L_byteCompareLoop   // loop.  Otherwise, return their difference.
-    bx      lr
-L_returnZero:
-    mov     r0,     ip
-L_lastByte:
-    sub     r0,     ip          // Return the difference of the final bytes
-    bx      lr
+L_buffersAreEqual:
+    mov     r0,         #0
+	CLEAR_FRAME_AND_RETURN