Loading...
--- Libc/Libc-763.13/arm/string/bzero_CortexA9.s
+++ Libc/Libc-825.24/arm/string/bzero_CortexA9.s
@@ -37,3 +37,109 @@
* writes value converted to an unsigned char to n successive bytes, beginning
* at destination.
*/
+
+#include <arm/arch.h>
+#if defined _ARM_ARCH_7
+
+/*****************************************************************************
+ * Macros *
+ *****************************************************************************/
+
+#define A9_ENTRY(name) \
+ .align 2;\
+ .globl _ ## name ## $VARIANT$CortexA9;\
+ _ ## name ## $VARIANT$CortexA9:
+
+#define ESTABLISH_FRAME \
+ push {r0,r4,r7,lr};\
+ add r7, sp, #8
+
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r0,r4,r7,pc}
+
+#define ADDITIONAL_CALLEE_SAVE_REGISTERS {r5,r6,r8}
+
+#define STORE_REGISTERS {r1,r3,r4,r5,r6,r8,r9,ip}
+
+/*****************************************************************************
+ * entry points *
+ *****************************************************************************/
+
+.text
+.syntax unified
+.code 32
+
+A9_ENTRY(__bzero)
+A9_ENTRY(bzero)
+ mov r2, r1
+ eor r1, r1
+
+A9_ENTRY(memset)
+// Early out if fewer than four bytes are to be set. Otherwise, store up to
+// three bytes to align the destination pointer to a word boundary.
+ ESTABLISH_FRAME
+ and r1, #0xff
+ subs r2, #4
+ orr r1, r1, r1, lsl #8
+ blo L_lengthLessThanFour
+ orr r1, r1, r1, lsl #16
+0: tst r0, #0x3
+ beq L_wordAligned
+ strb r1, [r0],#1
+ subs r2, #1
+ bhs 0b
+L_lengthLessThanFour:
+ adds r2, #4
+ beq 1f
+0: strb r1, [r0],#1
+ subs r2, #1
+ bne 0b
+1: CLEAR_FRAME_AND_RETURN
+
+L_wordAligned:
+// Destination pointer has word alignment. Early out if fewer than 64 bytes
+// are to be set. Otherwise, store up to 28 bytes to align the destination
+// pointer to a cacheline boundary.
+ mov r3, r1
+ mov r4, r1
+ subs r2, #0x3c
+ mov r9, r1
+ blo L_lengthLessThanSixtyFour
+0: tst r0, #0x1c
+ beq L_cachelineAligned
+ str r1, [r0],#4
+ subs r2, #4
+ bhs 0b
+L_lengthLessThanSixtyFour:
+ tst r2, #0x30
+ beq 1f
+0: stm r0!, {r1,r3,r4,r9}
+ sub r2, #0x10
+ tst r2, #0x30
+ bne 0b
+1: tst r2, #0xf
+ beq 2f
+ lsls ip, r2, #29
+ stmcs r0!, {r1,r3}
+ strmi r1, [r0],#4
+ lsls ip, r2, #31
+ strhcs r1, [r0],#2
+ strbmi r1, [r0]
+2: CLEAR_FRAME_AND_RETURN
+
+L_cachelineAligned:
+// Main unrolled loop; stores two complete cachelines per iteration.
+ push ADDITIONAL_CALLEE_SAVE_REGISTERS
+ mov r5, r1
+ mov r6, r1
+ mov r8, r1
+ mov ip, r1
+.align 4
+0: stm r0!, STORE_REGISTERS
+ subs r2, #0x40
+ stm r0!, STORE_REGISTERS
+ bhs 0b
+ pop ADDITIONAL_CALLEE_SAVE_REGISTERS
+ b L_lengthLessThanSixtyFour
+
+#endif // defined _ARM_ARCH_7