Loading...
arm/string/bzero_CortexA9.s Libc-763.13 Libc-825.24
--- Libc/Libc-763.13/arm/string/bzero_CortexA9.s
+++ Libc/Libc-825.24/arm/string/bzero_CortexA9.s
@@ -37,3 +37,109 @@
  * writes value converted to an unsigned char to n successive bytes, beginning
  * at destination.
  */
+
+#include <arm/arch.h>
+#if defined _ARM_ARCH_7
+
+/*****************************************************************************
+ * Macros                                                                    *
+ *****************************************************************************/
+
+#define A9_ENTRY(name) \
+	.align 2;\
+	.globl _ ## name ## $VARIANT$CortexA9;\
+	_ ## name ## $VARIANT$CortexA9:
+
+#define ESTABLISH_FRAME \
+	push   {r0,r4,r7,lr};\
+	add     r7,     sp, #8
+    
+#define CLEAR_FRAME_AND_RETURN \
+	pop    {r0,r4,r7,pc}
+    
+#define ADDITIONAL_CALLEE_SAVE_REGISTERS {r5,r6,r8}
+
+#define STORE_REGISTERS {r1,r3,r4,r5,r6,r8,r9,ip}
+
+/*****************************************************************************
+ *  entry points                                                             *
+ *****************************************************************************/
+
+.text
+.syntax unified
+.code 32
+
+A9_ENTRY(__bzero)
+A9_ENTRY(bzero)
+    mov     r2,     r1
+    eor     r1,     r1
+
+A9_ENTRY(memset)
+//  Early out if fewer than four bytes are to be set.  Otherwise, store up to
+//  three bytes to align the destination pointer to a word boundary.
+    ESTABLISH_FRAME
+    and     r1,         #0xff
+    subs    r2,         #4
+    orr     r1,     r1, r1, lsl #8
+    blo     L_lengthLessThanFour
+    orr     r1,     r1, r1, lsl #16
+0:  tst     r0,         #0x3
+    beq     L_wordAligned
+    strb    r1,    [r0],#1
+    subs    r2,         #1
+    bhs     0b
+L_lengthLessThanFour:
+    adds    r2,         #4
+    beq     1f
+0:  strb    r1,    [r0],#1
+    subs    r2,         #1
+    bne     0b
+1:  CLEAR_FRAME_AND_RETURN
+
+L_wordAligned:
+//  Destination pointer has word alignment.  Early out if fewer than 64 bytes
+//  are to be set.  Otherwise, store up to 28 bytes to align the destination
+//  pointer to a cacheline boundary.
+    mov     r3,     r1
+    mov     r4,     r1
+    subs    r2,         #0x3c
+    mov     r9,     r1
+    blo     L_lengthLessThanSixtyFour
+0:  tst     r0,         #0x1c
+    beq     L_cachelineAligned
+    str     r1,    [r0],#4
+    subs    r2,         #4
+    bhs     0b
+L_lengthLessThanSixtyFour:
+    tst     r2,         #0x30
+    beq     1f
+0:  stm     r0!,   {r1,r3,r4,r9}
+    sub     r2,         #0x10
+    tst     r2,         #0x30
+    bne     0b
+1:  tst     r2,         #0xf
+    beq     2f
+    lsls    ip,     r2, #29
+    stmcs   r0!,   {r1,r3}
+    strmi   r1,    [r0],#4
+    lsls    ip,     r2, #31
+    strhcs  r1,    [r0],#2
+    strbmi  r1,    [r0]
+2:  CLEAR_FRAME_AND_RETURN
+
+L_cachelineAligned:
+//  Main unrolled loop; stores two complete cachelines per iteration.
+    push    ADDITIONAL_CALLEE_SAVE_REGISTERS
+    mov     r5,     r1
+    mov     r6,     r1
+    mov     r8,     r1
+    mov     ip,     r1
+.align 4
+0:  stm     r0!,    STORE_REGISTERS
+    subs    r2,         #0x40
+    stm     r0!,    STORE_REGISTERS
+    bhs     0b
+    pop     ADDITIONAL_CALLEE_SAVE_REGISTERS
+    b       L_lengthLessThanSixtyFour
+
+#endif // defined _ARM_ARCH_7