Loading...
--- libmalloc/libmalloc-317.140.5/src/magazine_tiny.c
+++ libmalloc/libmalloc-657.80.3/src/magazine_tiny.c
@@ -38,25 +38,6 @@
 // reduce the msize part below zero (not checked).
 #define TINY_MAG_LAST_FREE_PTR_DEC_MSIZE(x, msize_delta) (x) = ((void *)(x) - (msize_delta))
 
-static MALLOC_INLINE MALLOC_ALWAYS_INLINE
-mag_index_t
-tiny_mag_get_thread_index(void)
-{
-#if CONFIG_TINY_USES_HYPER_SHIFT
-	if (os_likely(_os_cpu_number_override == -1)) {
-		return _malloc_cpu_number() >> hyper_shift;
-	} else {
-		return _os_cpu_number_override >> hyper_shift;
-	}
-#else // CONFIG_SMALL_USES_HYPER_SHIFT
-	if (os_likely(_os_cpu_number_override == -1)) {
-		return _malloc_cpu_number();
-	} else {
-		return _os_cpu_number_override;
-	}
-#endif // CONFIG_SMALL_USES_HYPER_SHIFT
-}
-
 static inline grain_t
 tiny_slot_from_msize(msize_t msize)
 {
@@ -81,7 +62,11 @@
 		if (BITARRAY_BIT(prev_header, prev_index)) {
 			return 1;
 		}
-		return TINY_PREVIOUS_MSIZE(ptr);
+		msize_t *prev_msize_ptr = &TINY_PREVIOUS_MSIZE(ptr);
+		// This is a speculative read of potentially in-use app memory, we need
+		// to use _malloc_read_uint16_via_rsp to avoid triggering warnings in
+		// memory diagnostic tools.
+		return _malloc_read_uint16_via_rsp(prev_msize_ptr);
 	}
 	// don't read possibly unmapped memory before the beginning of the region
 	return 0;
@@ -197,6 +182,160 @@
 	BITARRAY_CLR(block_header, index);
 	BITARRAY_CLR(in_use, index);
 }
+
+static MALLOC_INLINE void
+zero_tiny_free_inline_meta(void *ptr, msize_t msize)
+{
+	if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
+		*((tiny_free_list_t *)ptr) = (tiny_free_list_t){ 0 };
+		if (msize > 1) {
+			TINY_FREE_SIZE(ptr) = 0;
+			void *follower = FOLLOWING_TINY_PTR(ptr, msize);
+			TINY_PREVIOUS_MSIZE(follower) = 0;
+		} else if (msize == 0) {
+			TINY_FREE_SIZE(ptr) = 0;
+		}
+	}
+}
+
+static MALLOC_INLINE void
+zero_tiny_free_inline_meta_following(void *ptr, msize_t msize)
+{
+	if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
+		if (msize > 1) {
+			void *follower = FOLLOWING_TINY_PTR(ptr, msize);
+			TINY_PREVIOUS_MSIZE(follower) = 0;
+		}
+	}
+}
+
+static MALLOC_COLD MALLOC_NOINLINE void
+tiny_zero_corruption_abort(void *ptr, msize_t msize)
+{
+	uint8_t *bytes = ptr;
+	size_t size = TINY_BYTES_FOR_MSIZE(msize);
+	uint8_t *start = bytes, *end = bytes + size;
+	// scan to the first non-NUL byte
+	while (*bytes == '\0') {
+		bytes++;
+	}
+
+	unsigned int offset = (unsigned int)(bytes - start);
+	malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+			"Corruption detected in block %p of size %u at offset %u, "
+			"first 32 bytes at that offset are "
+			"%02X %02X %02X %02X %02X %02X %02X %02X | "
+			"%02X %02X %02X %02X %02X %02X %02X %02X | "
+			"%02X %02X %02X %02X %02X %02X %02X %02X | "
+			"%02X %02X %02X %02X %02X %02X %02X %02X\n",
+			ptr, (unsigned int)size, offset,
+			(bytes + 0) < end ? *(bytes + 0) : 0,
+			(bytes + 1) < end ? *(bytes + 1) : 0,
+			(bytes + 2) < end ? *(bytes + 2) : 0,
+			(bytes + 3) < end ? *(bytes + 3) : 0,
+			(bytes + 4) < end ? *(bytes + 4) : 0,
+			(bytes + 5) < end ? *(bytes + 5) : 0,
+			(bytes + 6) < end ? *(bytes + 6) : 0,
+			(bytes + 7) < end ? *(bytes + 7) : 0,
+			(bytes + 8) < end ? *(bytes + 8) : 0,
+			(bytes + 9) < end ? *(bytes + 9) : 0,
+			(bytes + 10) < end ? *(bytes + 10) : 0,
+			(bytes + 11) < end ? *(bytes + 11) : 0,
+			(bytes + 12) < end ? *(bytes + 12) : 0,
+			(bytes + 13) < end ? *(bytes + 13) : 0,
+			(bytes + 14) < end ? *(bytes + 14) : 0,
+			(bytes + 15) < end ? *(bytes + 15) : 0,
+			(bytes + 16) < end ? *(bytes + 16) : 0,
+			(bytes + 17) < end ? *(bytes + 17) : 0,
+			(bytes + 18) < end ? *(bytes + 18) : 0,
+			(bytes + 19) < end ? *(bytes + 19) : 0,
+			(bytes + 20) < end ? *(bytes + 20) : 0,
+			(bytes + 21) < end ? *(bytes + 21) : 0,
+			(bytes + 22) < end ? *(bytes + 22) : 0,
+			(bytes + 23) < end ? *(bytes + 23) : 0,
+			(bytes + 24) < end ? *(bytes + 24) : 0,
+			(bytes + 25) < end ? *(bytes + 25) : 0,
+			(bytes + 26) < end ? *(bytes + 26) : 0,
+			(bytes + 27) < end ? *(bytes + 27) : 0,
+			(bytes + 28) < end ? *(bytes + 28) : 0,
+			(bytes + 29) < end ? *(bytes + 29) : 0,
+			(bytes + 30) < end ? *(bytes + 30) : 0,
+			(bytes + 31) < end ? *(bytes + 31) : 0);
+}
+
+static MALLOC_INLINE void
+tiny_check_zero_or_clear(void *ptr, msize_t msize, boolean_t clear)
+{
+	switch (malloc_zero_policy) {
+	case MALLOC_ZERO_ON_FREE:
+		if (zero_on_free_should_sample() &&
+				_malloc_memcmp_zero_aligned8(ptr, TINY_BYTES_FOR_MSIZE(msize))) {
+			tiny_zero_corruption_abort(ptr, msize);
+		}
+		break;
+	case MALLOC_ZERO_NONE:
+		if (!clear) {
+			break;
+		}
+		MALLOC_FALLTHROUGH;
+	case MALLOC_ZERO_ON_ALLOC:
+		memset(ptr, '\0', TINY_BYTES_FOR_MSIZE(msize));
+		break;
+	}
+}
+
+// Check the inline metadata of a free block that hasn't already been verified:
+// - the previous freelist pointer
+// - agreement of the inline msizes
+//
+// Then clear all the inline metadata.
+static MALLOC_NOINLINE void
+_tiny_check_and_zero_inline_meta_from_freelist(rack_t *rack, void *ptr,
+		msize_t msize)
+{
+	tiny_free_list_t *free_ptr = ptr;
+
+	// check the previous pointer
+	(void)free_list_unchecksum_ptr(rack, &free_ptr->previous);
+	// zero both pointers
+	*free_ptr = (tiny_free_list_t){ 0 };
+
+	// check agreement between msizes and zero
+	if (msize > 1) {
+		msize_t leading_free_size = TINY_FREE_SIZE(ptr);
+		void *follower = FOLLOWING_TINY_PTR(ptr, msize);
+		msize_t trailing_free_size = TINY_PREVIOUS_MSIZE(follower);
+
+		if (leading_free_size != trailing_free_size) {
+			malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+					"Corruption of free object %p: msizes %u/%u disagree\n",
+					ptr, leading_free_size, trailing_free_size);
+		} else if (leading_free_size != msize) {
+			malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+					"Corruption at %p: unexpected msizes %u/%u\n",
+					ptr, leading_free_size, msize);
+		}
+
+		TINY_FREE_SIZE(ptr) = 0;
+		TINY_PREVIOUS_MSIZE(follower) = 0;
+	} else if (msize == 0 && TINY_FREE_SIZE(ptr) != 0) {
+		malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+				"Corruption at %p: unexpected nonzero msize %u\n", ptr,
+				TINY_FREE_SIZE(ptr));
+	}
+}
+
+static MALLOC_ALWAYS_INLINE MALLOC_INLINE void
+tiny_check_and_zero_inline_meta_from_freelist(rack_t *rack, void *ptr,
+		msize_t msize)
+{
+	if (malloc_zero_policy != MALLOC_ZERO_ON_FREE) {
+		return;
+	}
+
+	_tiny_check_and_zero_inline_meta_from_freelist(rack, ptr, msize);
+}
+
 
 static MALLOC_INLINE void
 set_tiny_meta_header_free(const void *ptr, msize_t msize)
@@ -589,6 +728,9 @@
 		if (previous_block) {
 			set_tiny_meta_header_middle(last_block);
 			tiny_free_list_remove_ptr(rack, tiny_mag_ptr, previous_block, previous_msize);
+			// zero out the trailing inline msize of the previous block to
+			// connect its zero prefix to the last block
+			zero_tiny_free_inline_meta_following(previous_block, previous_msize);
 			last_block = previous_block;
 			last_msize += previous_msize;
 		}
@@ -614,6 +756,9 @@
 			msize_t next_msize = get_tiny_free_size(next_block);
 			set_tiny_meta_header_middle(next_block);
 			tiny_free_list_remove_ptr(rack, tiny_mag_ptr, next_block, next_msize);
+			// zero inline metadata of next_block to continue the zero prefix of
+			// the big starting free block
+			zero_tiny_free_inline_meta(next_block, next_msize);
 			last_msize += next_msize;
 		}
 
@@ -873,6 +1018,11 @@
 				continue;
 			}
 
+			if (REGION_TRAILER_FOR_TINY_REGION(tiny)->pinned_to_depot > 0) {
+				SZONE_MAGAZINE_PTR_UNLOCK(mag_ptr);
+				continue;
+			}
+
 			if (tiny == mag_ptr->mag_last_region && (mag_ptr->mag_bytes_free_at_end || mag_ptr->mag_bytes_free_at_start)) {
 				tiny_finalize_region(rack, mag_ptr);
 			}
@@ -887,7 +1037,7 @@
 
 			SZONE_MAGAZINE_PTR_LOCK(tiny_depot_ptr);
 			MAGAZINE_INDEX_FOR_TINY_REGION(tiny) = DEPOT_MAGAZINE_INDEX;
-			REGION_TRAILER_FOR_TINY_REGION(tiny)->pinned_to_depot = 0;
+			MALLOC_ASSERT(REGION_TRAILER_FOR_TINY_REGION(tiny)->pinned_to_depot == 0);
 
 			size_t bytes_inplay = tiny_free_reattach_region(rack, tiny_depot_ptr, tiny);
 
@@ -976,11 +1126,6 @@
 {
 	magazine_t *depot_ptr = &(rack->magazines[DEPOT_MAGAZINE_INDEX]);
 
-	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
-	if (rack->num_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
-		return 0;
-	}
-
 #if DEBUG_MALLOC
 	if (DEPOT_MAGAZINE_INDEX == mag_index) {
 		malloc_zone_error(rack->debug_flags, true, "tiny_get_region_from_depot called for magazine index -1\n");
@@ -993,22 +1138,32 @@
 	// Appropriate a Depot'd region that can satisfy requested msize.
 	region_trailer_t *node;
 	region_t sparse_region;
+	msize_t try_msize = msize;
 
 	while (1) {
-		sparse_region = tiny_find_msize_region(rack, depot_ptr, DEPOT_MAGAZINE_INDEX, msize);
+		sparse_region = tiny_find_msize_region(rack, depot_ptr, DEPOT_MAGAZINE_INDEX, try_msize);
 		if (NULL == sparse_region) { // Depot empty?
 			SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
 			return 0;
 		}
 
 		node = REGION_TRAILER_FOR_TINY_REGION(sparse_region);
-		if (0 >= node->pinned_to_depot) {
+		if (0 == node->pinned_to_depot) {
+			// Found one!
 			break;
 		}
 
-		SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
-		yield();
-		SZONE_MAGAZINE_PTR_LOCK(depot_ptr);
+		// Try the next msize up - maybe the head of its free list will be in
+		// a region we can use. Once we get the region we'll still allocate the
+		// original msize.
+		try_msize++;
+
+		if (try_msize > NUM_TINY_SLOTS) {
+			// Tried all the msizes but couldn't get a usable region. Let's
+			// give up for now and we'll allocate a new region from the kernel.
+			SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
+			return 0;
+		}
 	}
 
 	// disconnect node from Depot
@@ -1019,7 +1174,7 @@
 
 	// Transfer ownership of the region
 	MAGAZINE_INDEX_FOR_TINY_REGION(sparse_region) = mag_index;
-	node->pinned_to_depot = 0;
+	MALLOC_ASSERT(node->pinned_to_depot == 0);
 
 	// Iterate the region putting its free entries on its new (locked) magazine's free list
 	size_t bytes_inplay = tiny_free_reattach_region(rack, tiny_mag_ptr, sparse_region);
@@ -1177,11 +1332,7 @@
 	region_trailer_t *node = REGION_TRAILER_FOR_TINY_REGION(region);
 	size_t bytes_used = node->bytes_used;
 
-	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
-	if (rack->num_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
-		/* NOTHING */
-		return TRUE; // Caller must do SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr)
-	} else if (DEPOT_MAGAZINE_INDEX != mag_index) {
+	if (DEPOT_MAGAZINE_INDEX != mag_index) {
 		// Emptiness discriminant
 		if (tiny_region_below_recirc_threshold(region)) {
 			/* Region has crossed threshold from density to sparsity. Mark it "suitable" on the
@@ -1227,8 +1378,12 @@
 }
 #endif // CONFIG_RECIRC_DEPOT
 
+#define TINY_FREE_FLAG_PARTIAL 0x1
+#define TINY_FREE_FLAG_FROM_CACHE 0x2
+
 boolean_t
-tiny_free_no_lock(rack_t *rack, magazine_t *tiny_mag_ptr, mag_index_t mag_index, region_t region, void *ptr, msize_t msize, boolean_t partial_free)
+tiny_free_no_lock(rack_t *rack, magazine_t *tiny_mag_ptr, mag_index_t mag_index,
+		region_t region, void *ptr, msize_t msize, uint32_t flags)
 {
 	void *original_ptr = ptr;
 	size_t original_size = TINY_BYTES_FOR_MSIZE(msize);
@@ -1265,9 +1420,15 @@
 		// clear the meta_header since this is no longer the start of a block
 		set_tiny_meta_header_middle(ptr);
 		tiny_free_list_remove_ptr(rack, tiny_mag_ptr, previous, previous_msize);
+
+		// zero out the trailing inline msize of the block to connect the zero
+		// prefix of this block to the newly free block
+		zero_tiny_free_inline_meta_following(previous, previous_msize);
+
 		ptr = previous;
 		msize += previous_msize;
 	}
+
 	// We try to coalesce with the next block
 	if ((next_block < TINY_REGION_HEAP_END(region)) && tiny_meta_header_is_free(next_block)) {
 		next_msize = get_tiny_free_size(next_block);
@@ -1304,6 +1465,12 @@
 
 			// clear the meta_header to enable coalescing backwards
 			set_tiny_meta_header_middle(big_free_block);
+
+			// zero out inline metadata to continue the zero prefix of the
+			// previous block - must happen before set_tiny_meta_header_free()
+			// reinitializes the new inline metadata
+			zero_tiny_free_inline_meta(big_free_block, next_msize);
+
 			set_tiny_meta_header_free(ptr, msize);
 
 			uint16_t next_block_index = TINY_INDEX_FOR_PTR(big_free_block) + 1;
@@ -1320,13 +1487,21 @@
 		}
 		tiny_free_list_remove_ptr(rack, tiny_mag_ptr, next_block, next_msize);
 		set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
+		// zero out inline metadata to continue the zero prefix of the previous
+		// block
+		zero_tiny_free_inline_meta(next_block, next_msize);
 		msize += next_msize;
 	}
 
-	// The tiny cache already scribbles free blocks as they go through the
-	// cache whenever msize < TINY_QUANTUM , so we do not need to do it here.
-	if ((rack->debug_flags & MALLOC_DO_SCRIBBLE) && msize && (msize >= TINY_QUANTUM)) {
-		memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
+	if (malloc_zero_policy != MALLOC_ZERO_ON_FREE) {
+		// The tiny cache already scribbles free blocks as they go through the
+		// cache, so we do not need to do it here.
+		//
+		// XXX This should probably also be conditional on CONFIG_TINY_CACHE
+		if ((rack->debug_flags & MALLOC_DO_SCRIBBLE) &&
+				!(flags & TINY_FREE_FLAG_FROM_CACHE)) {
+			memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
+		}
 	}
 
 	tiny_free_list_add_ptr(rack, tiny_mag_ptr, ptr, msize);
@@ -1344,7 +1519,7 @@
 	// posix_memalign and then free some range of bytes at the start and/or
 	// the end. In that case, we aren't changing the number of allocated objects.
 	// Similarly for realloc() in the case where we shrink in place.
-	if (!partial_free) {
+	if (!(flags & TINY_FREE_FLAG_PARTIAL)) {
 		trailer->objects_in_use--;
 		tiny_mag_ptr->mag_num_objects--;
 	}
@@ -1583,7 +1758,8 @@
 			// The block in mag_last_free is still marked as header and in-use, so copy that
 			// state to the block that remains. The state for the block that we're going to
 			// use is adjusted by the set_tiny_meta_header_middle() call below.
-			set_tiny_meta_header_in_use(next_block + TINY_BYTES_FOR_MSIZE(coalesced_msize), leftover_msize);
+			void *leftover_ptr = ((char *)next_block) + TINY_BYTES_FOR_MSIZE(coalesced_msize);
+			set_tiny_meta_header_in_use(leftover_ptr, leftover_msize);
 		} else {
 			// Using the whole block.
 			tiny_mag_ptr->mag_last_free = NULL;
@@ -1592,6 +1768,7 @@
 			trailer->objects_in_use--;
 		}
 		set_tiny_meta_header_middle(next_block);
+		tiny_check_zero_or_clear(last_free_ptr, coalesced_msize, false);
 		coalesced_msize = 0; // No net change in memory use
 	} else {
 #endif // CONFIG_TINY_CACHE
@@ -1616,6 +1793,7 @@
 				// Mark the first block of the remaining free area as a header and in-use.
 				set_tiny_meta_header_in_use_1(ptr + TINY_BYTES_FOR_MSIZE(new_msize));
 			}
+			tiny_check_zero_or_clear(unused_start, coalesced_msize, false);
 		} else {
 			/*
 			 * Look for a free block immediately afterwards.  If it's large
@@ -1637,6 +1815,10 @@
 		 	 */
 			tiny_free_list_remove_ptr(rack, tiny_mag_ptr, next_block, next_msize);
 			set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
+
+			tiny_check_and_zero_inline_meta_from_freelist(rack, next_block, next_msize);
+			tiny_check_zero_or_clear(next_block, coalesced_msize, false);
+
 			leftover_msize = next_msize - coalesced_msize;
 			if (leftover_msize) {
 				/* there's some left, so put the remainder back */
@@ -2036,6 +2218,7 @@
 		}
 #endif
 		tiny_update_region_free_list_for_remove(slot, ptr, next);
+		tiny_check_and_zero_inline_meta_from_freelist(rack, ptr, msize);
 
 		goto return_tiny_alloc;
 	}
@@ -2067,7 +2250,14 @@
 				BITMAPV_CLR(tiny_mag_ptr->mag_bitmap, slot);
 			}
 			this_msize = get_tiny_free_size(ptr);
+			if (os_unlikely(this_msize < msize)) {
+				malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+						"Corruption of tiny freelist %p: size too small (%u/%u)\n",
+						ptr, this_msize, msize);
+
+			}
 			tiny_update_region_free_list_for_remove(slot, ptr, next);
+			tiny_check_and_zero_inline_meta_from_freelist(rack, ptr, this_msize);
 			goto add_leftover_and_proceed;
 		}
 #if DEBUG_MALLOC
@@ -2082,18 +2272,27 @@
 	ptr = limit->p;
 	if (ptr) {
 		this_msize = get_tiny_free_size(ptr);
+        if (os_unlikely(this_msize < msize)) {
+            malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+                    "Corruption of tiny freelist %p: size too small (%u/%u)\n",
+                    ptr, this_msize, msize);
+        }
 		next = free_list_unchecksum_ptr(rack, &ptr->next);
 		if (this_msize - msize > NUM_TINY_SLOTS) {
 			// the leftover will go back to the free list, so we optimize by
 			// modifying the free list rather than a pop and push of the head
 			leftover_msize = this_msize - msize;
 			leftover_ptr = (tiny_free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
+
+			tiny_free_list_t tmp_ptr = *ptr;
+			tiny_check_and_zero_inline_meta_from_freelist(rack, ptr, this_msize);
+
 			limit->p = leftover_ptr;
 			if (next) {
 				next->previous.u = free_list_checksum_ptr(rack, leftover_ptr);
 			}
-			leftover_ptr->previous = ptr->previous;
-			leftover_ptr->next = ptr->next;
+			leftover_ptr->previous = tmp_ptr.previous;
+			leftover_ptr->next = tmp_ptr.next;
 			set_tiny_meta_header_free(leftover_ptr, leftover_msize);
 #if DEBUG_MALLOC
 			if (LOG(szone, ptr)) {
@@ -2111,6 +2310,7 @@
 		}
 		limit->p = next;
 		tiny_update_region_free_list_for_remove(slot, ptr, next);
+		tiny_check_and_zero_inline_meta_from_freelist(rack, ptr, this_msize);
 		goto add_leftover_and_proceed;
 		/* NOTREACHED */
 	}
@@ -2155,6 +2355,8 @@
 
 add_leftover_and_proceed:
 	if (!this_msize || (this_msize > msize)) {
+		// XXX This works even when (this_msize == 0) because the unsigned
+		// subtraction wraps around to the correct result
 		leftover_msize = this_msize - msize;
 		leftover_ptr = (tiny_free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
 #if DEBUG_MALLOC
@@ -2205,7 +2407,7 @@
 tiny_malloc_should_clear(rack_t *rack, msize_t msize, boolean_t cleared_requested)
 {
 	void *ptr;
-	mag_index_t mag_index = tiny_mag_get_thread_index() % rack->num_magazines;
+	mag_index_t mag_index = rack_get_thread_index(rack) % rack->num_magazines;
 	magazine_t *tiny_mag_ptr = &(rack->magazines[mag_index]);
 
 	MALLOC_TRACE(TRACE_tiny_malloc, (uintptr_t)rack, TINY_BYTES_FOR_MSIZE(msize), (uintptr_t)tiny_mag_ptr, cleared_requested);
@@ -2234,9 +2436,9 @@
 		tiny_mag_ptr->mag_last_free_rgn = NULL;
 		SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
 		CHECK(szone, __PRETTY_FUNCTION__);
-		if (cleared_requested) {
-			memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
-		}
+
+		tiny_check_zero_or_clear(ptr, msize, cleared_requested);
+
 #if DEBUG_MALLOC
 		if (LOG(szone, ptr)) {
 			malloc_report(ASL_LEVEL_INFO, "in tiny_malloc_should_clear(), tiny cache ptr=%p, msize=%d\n", ptr, msize);
@@ -2251,9 +2453,7 @@
 		if (ptr) {
 			SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
 			CHECK(szone, __PRETTY_FUNCTION__);
-			if (cleared_requested) {
-				memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
-			}
+			tiny_check_zero_or_clear(ptr, msize, cleared_requested);
 			return ptr;
 		}
 
@@ -2263,9 +2463,7 @@
 			if (ptr) {
 				SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
 				CHECK(szone, __PRETTY_FUNCTION__);
-				if (cleared_requested) {
-					memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
-				}
+				tiny_check_zero_or_clear(ptr, msize, cleared_requested);
 				return ptr;
 			}
 		}
@@ -2273,16 +2471,17 @@
 
 		// The magazine is exhausted. A new region (heap) must be allocated to satisfy this call to malloc().
 		// The allocation, an mmap() system call, will be performed outside the magazine spin locks by the first
-		// thread that suffers the exhaustion. That thread sets "alloc_underway" and enters a critical section.
-		// Threads arriving here later are excluded from the critical section, yield the CPU, and then retry the
-		// allocation. After some time the magazine is resupplied, the original thread leaves with its allocation,
-		// and retry-ing threads succeed in the code just above.
-		if (!tiny_mag_ptr->alloc_underway) {
+		// thread that suffers the exhaustion. That thread accquires the magazine_alloc_lock, then drops the
+		// magazine lock to allow freeing threads to proceed. Allocating thrads that arrive later  are excluded
+		// from the critial section by the alloc lock. When those are unblocked, they succeed in the code above.
+		//
+		// Note that we need to trylock the alloc lock to avoid a deadlock, since we can't block on the alloc
+		// lock while holding the magazine lock
+		if (os_likely(_malloc_lock_trylock(&tiny_mag_ptr->magazine_alloc_lock))) {
+			// We got the alloc lock, so we are the thread that should allocate a new region
 			void *fresh_region;
 
 			// time to create a new region (do this outside the magazine lock)
-			tiny_mag_ptr->alloc_underway = TRUE;
-			OSMemoryBarrier();
 			SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
 			fresh_region = mvm_allocate_pages(TINY_REGION_SIZE,
 					TINY_BLOCKS_ALIGN,
@@ -2294,24 +2493,30 @@
 			MAGMALLOC_ALLOCREGION(TINY_SZONE_FROM_RACK(rack), (int)mag_index, fresh_region, TINY_REGION_SIZE);
 
 			if (!fresh_region) { // out of memory!
-				tiny_mag_ptr->alloc_underway = FALSE;
-				OSMemoryBarrier();
 				SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
+				_malloc_lock_unlock(&tiny_mag_ptr->magazine_alloc_lock);
 				return NULL;
 			}
 
 			region_set_cookie(&REGION_COOKIE_FOR_TINY_REGION(fresh_region));
 			ptr = tiny_malloc_from_region_no_lock(rack, tiny_mag_ptr, mag_index, msize, fresh_region);
 
-			// we don't clear because this freshly allocated space is pristine
-			tiny_mag_ptr->alloc_underway = FALSE;
-			OSMemoryBarrier();
+			// we don't clear or zero-check because this freshly allocated space
+			// is pristine
 			SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
+			_malloc_lock_unlock(&tiny_mag_ptr->magazine_alloc_lock);
 			CHECK(szone, __PRETTY_FUNCTION__);
 			return ptr;
 		} else {
+			// We failed to get the alloc lock, so someone else is allocating.
+			// Drop the magazine lock...
 			SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
-			yield();
+
+			// Wait for the other thread on the alloc lock
+			_malloc_lock_lock(&tiny_mag_ptr->magazine_alloc_lock);
+			_malloc_lock_unlock(&tiny_mag_ptr->magazine_alloc_lock);
+
+			// Reacquire the magazine lock to go around the loop again
 			SZONE_MAGAZINE_PTR_LOCK(tiny_mag_ptr);
 		}
 	}
@@ -2373,6 +2578,7 @@
 	boolean_t is_free;
 	mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(tiny_region);
 	magazine_t *tiny_mag_ptr = &(rack->magazines[mag_index]);
+	uint32_t flags = 0;
 
 	MALLOC_TRACE(TRACE_tiny_free, (uintptr_t)rack, (uintptr_t)ptr, (uintptr_t)tiny_mag_ptr, known_size);
 
@@ -2393,6 +2599,10 @@
 	}
 #endif
 
+	if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
+		memset(ptr, '\0', TINY_BYTES_FOR_MSIZE(msize));
+	}
+
 	SZONE_MAGAZINE_PTR_LOCK(tiny_mag_ptr);
 
 #if CONFIG_TINY_CACHE
@@ -2410,8 +2620,10 @@
 				return;
 			}
 
-			if ((rack->debug_flags & MALLOC_DO_SCRIBBLE) && msize) {
-				memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
+			if (malloc_zero_policy != MALLOC_ZERO_ON_FREE) {
+				if ((rack->debug_flags & MALLOC_DO_SCRIBBLE) && msize) {
+					memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
+				}
 			}
 
 			tiny_mag_ptr->mag_last_free = ptr;
@@ -2427,6 +2639,7 @@
 			msize = msize2;
 			ptr = ptr2;
 			tiny_region = rgn2;
+			flags |= TINY_FREE_FLAG_FROM_CACHE;
 		}
 	}
 #endif /* CONFIG_TINY_CACHE */
@@ -2445,8 +2658,12 @@
 		SZONE_MAGAZINE_PTR_LOCK(tiny_mag_ptr);
 	}
 
+	if (partial_free) {
+		flags |= TINY_FREE_FLAG_PARTIAL;
+	}
+
 	if (tiny_free_no_lock(rack, tiny_mag_ptr, mag_index, tiny_region, ptr,
-			msize, partial_free)) {
+			msize, flags)) {
 		SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
 	}
 
@@ -2458,7 +2675,8 @@
 {
 	msize_t msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
 	unsigned found = 0;
-	mag_index_t mag_index = tiny_mag_get_thread_index() % szone->tiny_rack.num_magazines;
+
+	mag_index_t mag_index = rack_get_thread_index(&szone->tiny_rack) % szone->tiny_rack.num_magazines;
 	magazine_t *tiny_mag_ptr = &(szone->tiny_rack.magazines[mag_index]);
 
 	// make sure to return objects at least one quantum in size
@@ -2533,7 +2751,10 @@
 				if (is_free) {
 					break; // a double free; let the standard free deal with it
 				}
-				if (!tiny_free_no_lock(&szone->tiny_rack, tiny_mag_ptr, mag_index, tiny_region, ptr, msize, false)) {
+				if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
+					memset(ptr, '\0', TINY_BYTES_FOR_MSIZE(msize));
+				}
+				if (!tiny_free_no_lock(&szone->tiny_rack, tiny_mag_ptr, mag_index, tiny_region, ptr, msize, 0)) {
 					// Arrange to re-acquire magazine lock
 					tiny_mag_ptr = NULL;
 					tiny_region = NULL;
@@ -2761,3 +2982,39 @@
 	}
 	return 1;
 }
+
+boolean_t
+tiny_check(rack_t *rack, unsigned counter)
+{
+	size_t index;
+
+	/* check tiny regions - chould check region count */
+	for (index = 0; index < rack->region_generation->num_regions_allocated; ++index) {
+		region_t tiny = rack->region_generation->hashed_regions[index];
+
+		if (HASHRING_REGION_DEALLOCATED == tiny) {
+			continue;
+		}
+
+		if (tiny) {
+			magazine_t *tiny_mag_ptr = mag_lock_zine_for_region_trailer(rack->magazines,
+					REGION_TRAILER_FOR_TINY_REGION(tiny),
+					MAGAZINE_INDEX_FOR_TINY_REGION(tiny));
+
+			if (!tiny_check_region(rack, tiny, index, counter)) {
+				SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
+				return 0;
+			}
+			SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr);
+		}
+	}
+
+	/* check tiny free lists */
+	for (index = 0; index < NUM_TINY_SLOTS; ++index) {
+		if (!tiny_free_list_check(rack, (grain_t)index, counter)) {
+			return 0;
+		}
+	}
+
+	return 1;
+}