nanov2_malloc.c diff - src/nanov2_malloc.c - Libmalloc source code libmalloc-409.81.2

src/nanov2_malloc.c libmalloc-409.81.2 ⇄ libmalloc-374.120.1
--- libmalloc/libmalloc-409.81.2/src/nanov2_malloc.c
+++ libmalloc/libmalloc-374.120.1/src/nanov2_malloc.c
@@ -41,38 +41,18 @@
 #pragma mark -
 #pragma mark Externals for resolved functions
 
+MALLOC_NOEXPORT extern void *nanov2_allocate(nanozonev2_t *nanozone, size_t rounded_size,
+		boolean_t clear);
+MALLOC_NOEXPORT extern void nanov2_free_to_block(nanozonev2_t *nanozone, void *ptr,
+		nanov2_size_class_t size_class);
+MALLOC_NOEXPORT extern boolean_t nanov2_madvise_block(nanozonev2_t *nanozone,
+		nanov2_block_meta_t *block_metap, nanov2_block_t *blockp,
+		nanov2_size_class_t size_class);
 MALLOC_NOEXPORT extern size_t nanov2_pointer_size(nanozonev2_t *nanozone, void *ptr,
 		boolean_t allow_inner);
 MALLOC_NOEXPORT extern size_t nanov2_pressure_relief(nanozonev2_t *nanozone, size_t goal);
 
 #if OS_VARIANT_RESOLVED
-MALLOC_ALWAYS_INLINE MALLOC_INLINE size_t
-nanov2_pointer_size_inline(nanozonev2_t *nanozone, void *ptr, boolean_t allow_inner,
-		nanov2_size_class_t *size_class_out, nanov2_block_meta_t **block_metap_out);
-
-MALLOC_ALWAYS_INLINE MALLOC_INLINE void *
-nanov2_allocate_from_block_inline(nanozonev2_t *nanozone,
-		nanov2_block_meta_t *block_metap, nanov2_size_class_t size_class,
-		nanov2_block_meta_t **madvise_block_metapp_out, bool *corruption);
-
-static void *
-nanov2_allocate_outlined(nanozonev2_t *nanozone,
-		nanov2_block_meta_t **block_metapp, size_t rounded_size,
-		nanov2_size_class_t size_class, int allocation_index,
-		nanov2_block_meta_t *madvise_block_metap, void *corrupt_slot,
-		bool clear);
-
-MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_block_meta_t *
-nanov2_free_to_block_inline(nanozonev2_t *nanozone, void *ptr,
-		nanov2_size_class_t size_class, nanov2_block_meta_t *block_metap);
-
-static boolean_t nanov2_madvise_block_locked(
-		nanozonev2_t *nanozone, nanov2_block_meta_t *block_metap,
-		nanov2_block_t *blockp, nanov2_size_class_t size_class, uint32_t expected_state);
-static void nanov2_madvise_block(nanozonev2_t *nanozone,
-		nanov2_block_meta_t *block_metap, nanov2_size_class_t size_class,
-		uint32_t expected_state);
-
 MALLOC_NOEXPORT extern nanov2_arena_t *nanov2_allocate_new_region(nanozonev2_t *nanozone);
 #endif // OS_VARIANT_RESOLVED
 
@@ -657,13 +637,13 @@
 		// Default case is max magazines == physical number of CPUs, which
 		// must be > _malloc_cpu_number() >> hyper_shift, so the modulo
 		// operation is not required.
-		return (_malloc_cpu_number() >> hyper_shift) & MAX_CURRENT_BLOCKS_MASK;
+		return _malloc_cpu_number() >> hyper_shift;
 	}
 #else // CONFIG_NANO_USES_HYPER_SHIFT
 	if (os_likely(nano_common_max_magazines_is_ncpu)) {
 		// Default case is max magazines == logical number of CPUs, which
 		// must be > _malloc_cpu_number() so the modulo operation is not required.
-		return _malloc_cpu_number() & MAX_CURRENT_BLOCKS_MASK;
+		return _malloc_cpu_number();
 	}
 #endif // CONFIG_NANO_USES_HYPER_SHIFT
 
@@ -673,11 +653,9 @@
 #endif // CONFIG_NANO_USES_HYPER_SHIFT
 
 	if (os_likely(_os_cpu_number_override == -1)) {
-		return ((_malloc_cpu_number() >> shift) % nano_common_max_magazines) &
-				MAX_CURRENT_BLOCKS_MASK;
-	}
-	return ((_os_cpu_number_override >> shift) % nano_common_max_magazines) &
-			MAX_CURRENT_BLOCKS_MASK;
+		return (_malloc_cpu_number() >> shift) % nano_common_max_magazines;
+	}
+	return (_os_cpu_number_override >> shift) % nano_common_max_magazines;
 }
 #endif // OS_VARIANT_RESOLVED
 
@@ -1062,8 +1040,7 @@
 MALLOC_NOEXPORT size_t
 nanov2_size(nanozonev2_t *nanozone, const void *ptr)
 {
-	size_t size = nanov2_pointer_size_inline(nanozone, (void *)ptr, FALSE,
-			NULL, NULL);
+	size_t size = nanov2_pointer_size(nanozone, (void *)ptr, FALSE);
 	return size ? size : nanozone->helper_zone->size(nanozone->helper_zone, ptr);
 }
 
@@ -1072,166 +1049,54 @@
 {
 	size_t rounded_size = _nano_common_good_size(size);
 	if (rounded_size <= NANO_MAX_SIZE) {
-		nanov2_block_meta_t *madvise_block_metap = NULL;
-		nanov2_size_class_t size_class = nanov2_size_class_from_size(rounded_size);
-
-		// Get the index of the pointer to the block from which we are should be
-		// allocating. This currently depends on the physical CPU number.
-		int allocation_index = nanov2_get_allocation_block_index();
-
-		// Get the current allocation block meta data pointer. If this is NULL,
-		// we need to find a new allocation block.
-		nanov2_block_meta_t **block_metapp =
-				&nanozone->current_block[size_class][allocation_index];
-		nanov2_block_meta_t *block_metap = os_atomic_load(block_metapp, relaxed);
-		bool corruption = false;
-		void *ptr = NULL;
-		if (block_metap) {
-			// Fast path: we have a block -- try to allocate from it.
-			ptr = nanov2_allocate_from_block_inline(nanozone, block_metap,
-					size_class, &madvise_block_metap, &corruption);
-			if (ptr && !corruption) {
-				// Always clear the double-free guard so that we can recognize
-				// that this block is not on the free list.
-				nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
-				os_atomic_store(&slotp->double_free_guard, 0, relaxed);
-
-				// We know the body of the allocation is already clear, so we just
-				// need to clean up the next_slot word to get to all-zero.  Do so in
-				// all cases, even if a cleared allocation is not requested, to
-				// prevent any leakage through the next_slot bits.
-				os_atomic_store(&slotp->next_slot, 0, relaxed);
-				return ptr;
+		void *ptr = nanov2_allocate(nanozone, rounded_size, FALSE);
+		if (ptr) {
+			if (os_unlikely(size && (nanozone->debug_flags & MALLOC_DO_SCRIBBLE))) {
+				memset(ptr, SCRIBBLE_BYTE, size);
 			}
-		}
-
-		return nanov2_allocate_outlined(nanozone, block_metapp, rounded_size,
-				size_class, allocation_index, madvise_block_metap, ptr, false);
-	}
-
-	// Too big for nano, so delegate to the helper zone.
+			return ptr;
+		}
+	}
+
+	// If we reach this point, we couldn't allocate, so delegate to the
+	// helper zone.
 	return nanozone->helper_zone->malloc(nanozone->helper_zone, size);
-}
-
-MALLOC_ALWAYS_INLINE MALLOC_INLINE
-void
-nanov2_bzero(void *ptr, size_t size)
-{
-	// TODO: inline bzero from libplatform
-	bzero(ptr, size);
 }
 
 MALLOC_NOEXPORT void
 nanov2_free_definite_size(nanozonev2_t *nanozone, void *ptr, size_t size)
 {
+	// Check whether it's a Nano pointer and get the size. We should only get
+	// here if it is and furthermore we already know that "size" is the actual
+	// rounded size, so don't waste time rechecking that. This is just a
+	// sanity check.
 	if (ptr && nanov2_has_valid_signature(ptr)) {
-		nanov2_size_class_t size_class = nanov2_size_class_from_size(size);
-
-		if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
-			if (size_class != 0) {
-				nanov2_bzero((char *)ptr + sizeof(nanov2_free_slot_t),
-						size - sizeof(nanov2_free_slot_t));
-			}
-		}
-
-		nanov2_block_meta_t *madvise_block_metap = nanov2_free_to_block_inline(
-				nanozone, ptr, size_class, NULL);
-		if (madvise_block_metap) {
-			nanov2_madvise_block(nanozone, madvise_block_metap, size_class,
-					SLOT_CAN_MADVISE);
-		}
+		if (os_unlikely(nanozone->debug_flags & MALLOC_DO_SCRIBBLE)) {
+			memset(ptr, SCRABBLE_BYTE, size);
+		}
+		nanov2_free_to_block(nanozone, ptr, nanov2_size_class_from_size(size));
 		return;
 	}
 	return nanozone->helper_zone->free_definite_size(nanozone->helper_zone, ptr,
 			size);
 }
 
-static void
-_nanov2_free(nanozonev2_t *nanozone, void *ptr, bool try)
-{
-	if (ptr) {
+MALLOC_NOEXPORT void
+nanov2_free(nanozonev2_t *nanozone, void *ptr)
+{
+	if (ptr && nanov2_has_valid_signature(ptr)) {
 		// Check whether it's a Nano pointer and get the size. If it's not
 		// Nano, pass it to the helper zone.
-		nanov2_size_class_t size_class;
-		nanov2_block_meta_t *block_metap;
-		size_t size = nanov2_pointer_size_inline(nanozone, ptr, FALSE,
-				&size_class, &block_metap);
+		size_t size = nanov2_pointer_size(nanozone, ptr, FALSE);
 		if (size) {
-			if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
-				if (size > sizeof(nanov2_free_slot_t)) {
-					nanov2_bzero((char *)ptr + sizeof(nanov2_free_slot_t),
-							size - sizeof(nanov2_free_slot_t));
-				}
+			if (os_unlikely(nanozone->debug_flags & MALLOC_DO_SCRIBBLE)) {
+				memset(ptr, SCRABBLE_BYTE, size);
 			}
-
-			nanov2_block_meta_t *madvise_block_metap = nanov2_free_to_block_inline(
-					nanozone, ptr, size_class, block_metap);
-			if (madvise_block_metap) {
-				nanov2_madvise_block(nanozone, madvise_block_metap, size_class,
-						SLOT_CAN_MADVISE);
-			}
+			nanov2_free_to_block(nanozone, ptr, nanov2_size_class_from_size(size));
 			return;
 		}
 	}
-	return try ? nanozone->helper_zone->try_free_default(nanozone->helper_zone, ptr) :
-			nanozone->helper_zone->free(nanozone->helper_zone, ptr);
-}
-
-MALLOC_NOEXPORT void
-nanov2_free(nanozonev2_t *nanozone, void *ptr)
-{
-	_nanov2_free(nanozone, ptr, false);
-}
-
-MALLOC_NOEXPORT void
-nanov2_try_free_default(nanozonev2_t *nanozone, void *ptr)
-{
-	_nanov2_free(nanozone, ptr, true);
-}
-
-MALLOC_ALWAYS_INLINE MALLOC_INLINE
-void *
-nanov2_malloc_zero(nanozonev2_t *nanozone, size_t rounded_size)
-{
-	nanov2_block_meta_t *madvise_block_metap = NULL;
-	nanov2_size_class_t size_class = nanov2_size_class_from_size(rounded_size);
-
-	// Get the index of the pointer to the block from which we are should be
-	// allocating. This currently depends on the physical CPU number.
-	int allocation_index = nanov2_get_allocation_block_index();
-
-	// Get the current allocation block meta data pointer. If this is NULL,
-	// we need to find a new allocation block.
-	nanov2_block_meta_t **block_metapp =
-			&nanozone->current_block[size_class][allocation_index];
-	nanov2_block_meta_t *block_metap = os_atomic_load(block_metapp, relaxed);
-	bool corruption = false;
-	void *ptr = NULL;
-	if (block_metap) {
-		// Fast path: we have a block -- try to allocate from it.
-		ptr = nanov2_allocate_from_block_inline(nanozone, block_metap,
-				size_class, &madvise_block_metap, &corruption);
-		if (ptr && !corruption) {
-			if (malloc_zero_policy == MALLOC_ZERO_ON_FREE) {
-				// Always clear the double-free guard so that we can recognize that
-				// this block is not on the free list.
-				nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
-				os_atomic_store(&slotp->double_free_guard, 0, relaxed);
-
-				// We know the body of the allocation is already clear, so we just
-				// need to clean up the next_slot word to get to all-zero.  Do so in
-				// all cases, even if a cleared allocation is not requested, to
-				// prevent any leakage through the next_slot bits.
-				os_atomic_store(&slotp->next_slot, 0, relaxed);
-			} else {
-				nanov2_bzero(ptr, rounded_size);
-			}
-			return ptr;
-		}
-	}
-
-	return nanov2_allocate_outlined(nanozone, block_metapp, rounded_size,
-			size_class, allocation_index, madvise_block_metap, ptr, true);
+	return nanozone->helper_zone->free(nanozone->helper_zone, ptr);
 }
 
 MALLOC_NOEXPORT void *
@@ -1243,23 +1108,15 @@
 	}
 	size_t rounded_size = _nano_common_good_size(total_bytes);
 	if (total_bytes <= NANO_MAX_SIZE) {
-		return nanov2_malloc_zero(nanozone, rounded_size);
-	}
-
-	// Too big for nano, so delegate to the helper zone.
+		void *ptr = nanov2_allocate(nanozone, rounded_size, TRUE);
+		if (ptr) {
+			return ptr;
+		}
+	}
+
+	// If we reach this point, we couldn't allocate, so delegate to the
+	// helper zone.
 	return nanozone->helper_zone->calloc(nanozone->helper_zone, 1, total_bytes);
-}
-
-MALLOC_NOEXPORT void *
-nanov2_malloc_zero_on_alloc(nanozonev2_t *nanozone, size_t size)
-{
-	size_t rounded_size = _nano_common_good_size(size);
-	if (rounded_size <= NANO_MAX_SIZE) {
-		return nanov2_malloc_zero(nanozone, rounded_size);
-	}
-
-	// Too big for nano, so delegate to the helper zone.
-	return nanozone->helper_zone->malloc(nanozone->helper_zone, size);
 }
 #endif // OS_VARIANT_RESOLVED
 
@@ -1313,9 +1170,7 @@
 			}
 		} else {
 			// Same size or shrinking by less than half size. Keep the same
-			// allocation and scribble the area that's being released. Nothing
-			// to do for zero-on-free yet; that will be taken care of when the
-			// shrunk allocation is freed.
+			// allocation and clear the area that's being released.
 			if (new_size != old_size) {
 				MALLOC_ASSERT(new_size < old_size);
 				if (os_unlikely(nanozone->debug_flags & MALLOC_DO_SCRIBBLE)) {
@@ -1362,8 +1217,7 @@
 	size_t rounded_size = _nano_common_good_size(size);
 	if (rounded_size <= NANO_MAX_SIZE) {
 		while (allocated < count) {
-			// TODO: nanov2_malloc will redo _nano_common_good_size
-			void *ptr = nanov2_malloc(nanozone, rounded_size);
+			void *ptr = nanov2_allocate(nanozone, rounded_size, FALSE);
 			if (!ptr) {
 				break;
 			}
@@ -1394,20 +1248,19 @@
 		}
 	}
 }
-
-MALLOC_NOEXPORT void *
+#endif // OS_VARIANT_RESOLVED
+
+#if OS_VARIANT_NOTRESOLVED
+static void *
 nanov2_memalign(nanozonev2_t *nanozone, size_t alignment, size_t size)
 {
-	// Serve directly if the requested alignment is trivially satisfied by our
-	// baseline alignment (16 bytes)
-	if (alignment <= NANO_REGIME_QUANTA_SIZE) {
-		return nanov2_malloc(nanozone, size);
-	}
-
-	// Otherwise delegate to the helper zone
+	// Always delegate this to the helper zone.
 	return nanozone->helper_zone->memalign(nanozone->helper_zone, alignment,
 			size);
 }
+#endif // OS_VARIANT_NOTRESOLVED
+
+#if OS_VARIANT_RESOLVED
 
 size_t
 nanov2_pressure_relief(nanozonev2_t *nanozone, size_t goal)
@@ -1453,9 +1306,8 @@
 					if (meta.next_slot == SLOT_CAN_MADVISE) {
 						nanov2_block_t *blockp = nanov2_block_address_from_meta_index(
 								nanozone, arena, i);
-						if (nanov2_madvise_block_locked(nanozone, block_metap,
-								blockp, nanov2_size_class_for_ptr(nanozone, blockp),
-								SLOT_CAN_MADVISE)) {
+						if (nanov2_madvise_block(nanozone, block_metap,
+								blockp, nanov2_size_class_for_ptr(nanozone, blockp))) {
 							total += NANOV2_BLOCK_SIZE;
 						}
 					}
@@ -2111,10 +1963,8 @@
 // the allocation, or 0 if the pointer does not correspond to an active
 // allocation. If allow_inner is true, the pointer need not point to the start
 // of the allocation.
-MALLOC_ALWAYS_INLINE MALLOC_INLINE size_t
-nanov2_pointer_size_inline(nanozonev2_t *nanozone, void *ptr,
-		boolean_t allow_inner, nanov2_size_class_t *size_class_out,
-		nanov2_block_meta_t **block_metap_out)
+size_t
+nanov2_pointer_size(nanozonev2_t *nanozone, void *ptr, boolean_t allow_inner)
 {
 	// First check the address signature.
 	if (!nanov2_has_valid_signature((void *)ptr)) {
@@ -2196,42 +2046,27 @@
 		return 0;
 	}
 
-	if (size_class_out) {
-		*size_class_out = size_class;
-	}
-	if (block_metap_out) {
-		*block_metap_out = block_metap;
-	}
 	return size;
-}
-
-size_t
-nanov2_pointer_size(nanozonev2_t *nanozone, void *ptr, boolean_t allow_inner)
-{
-	return nanov2_pointer_size_inline(nanozone, ptr, allow_inner, NULL, NULL);
 }
 
 #pragma mark -
 #pragma mark Madvise Management
 
-// Given a pointer to a block and its metadata, calls madvise() on that block if
-// it is still in the state we expect, either SLOT_CAN_MADVISE or SLOT_MADVISED
-// (the latter expected when we need to pessimistically re-madvise a block we
-// may have touched while racing to allocate against a transition to
-// SLOT_CAN_MADVISE). Returns true on success, false if the block is not in the
-// correct state or if the state changed during the operation.
+// Given a pointer to a block and its metadata, calls madvise() on that block
+// if it is in state SLOT_CAN_MADVISE. Returns true on success, false if the
+// block is not in the correct state or if the state changed during the
+// operation.
 //
 // This function must be called with the zone's madvise_lock held
-static boolean_t
-nanov2_madvise_block_locked(nanozonev2_t *nanozone,
-		nanov2_block_meta_t *block_metap, nanov2_block_t *blockp,
-		nanov2_size_class_t size_class, uint32_t expected_state)
+boolean_t
+nanov2_madvise_block(nanozonev2_t *nanozone, nanov2_block_meta_t *block_metap,
+		nanov2_block_t *blockp, nanov2_size_class_t size_class)
 {
 	_malloc_lock_assert_owner(&nanozone->madvise_lock);
 
 	boolean_t madvised = FALSE;
 	nanov2_block_meta_t old_meta = os_atomic_load(block_metap, relaxed);
-	if (old_meta.next_slot == expected_state) {
+	if (old_meta.next_slot == SLOT_CAN_MADVISE) {
 		// Nobody raced with us. We can safely madvise this block. First change
 		// the state to SLOT_MADVISING so that other threads don't try to
 		// grab the block for new allocations.
@@ -2273,18 +2108,6 @@
 	return madvised;
 }
 
-static void
-nanov2_madvise_block(nanozonev2_t *nanozone, nanov2_block_meta_t *block_metap,
-		nanov2_size_class_t size_class, uint32_t expected_state)
-{
-	nanov2_block_t *blockp = nanov2_block_address_from_meta_ptr(nanozone,
-			block_metap);
-	_malloc_lock_lock(&nanozone->madvise_lock);
-	nanov2_madvise_block_locked(nanozone, block_metap, blockp, size_class,
-			expected_state);
-	_malloc_lock_unlock(&nanozone->madvise_lock);
-}
-
 #endif // OS_VARIANT_RESOLVED
 
 #pragma mark -
@@ -2380,38 +2203,18 @@
 
 #if OS_VARIANT_RESOLVED
 
-MALLOC_NOINLINE MALLOC_NORETURN
-static void
-nanov2_guard_corruption_detected(void *corrupt_slot)
-{
-	uint64_t guard = *(uint64_t *)corrupt_slot;
-	malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
-			"Heap corruption detected, free list is damaged at %p\n"
-			"*** Incorrect guard value: %lu\n", corrupt_slot, guard);
-	__builtin_unreachable();
-}
-
 // Allocates memory from the block that corresponds to a given block meta data
 // pointer. The memory is taken from the free list if possible, or from the
 // unused region of the block if not. If the block is no longer in use or is
 // full, NULL is returned and the caller is expected to find another block to
 // allocate from.
-MALLOC_ALWAYS_INLINE MALLOC_INLINE
+MALLOC_NOEXPORT
 void *
-nanov2_allocate_from_block_inline(nanozonev2_t *nanozone,
-		nanov2_block_meta_t *block_metap, nanov2_size_class_t size_class,
-		nanov2_block_meta_t **madvise_block_metap_out, bool *corruption)
+nanov2_allocate_from_block(nanozonev2_t *nanozone,
+		nanov2_block_meta_t *block_metap, nanov2_size_class_t size_class)
 {
 	nanov2_block_meta_view_t old_meta_view;
-
-	// Our loads of the block metadata use dependency ordering, which guarantees
-	// that any loads we do from a slot pointer derived from the metadata value
-	// as we do below will observe all of the stores preceding the store-release
-	// of that value we observed.  This allows us to safely rely on the contents
-	// of the slot updated when it was last freed, including the double-free
-	// guard and zeroing done by zero-on-free (which is required for correctness
-	// in the case of calloc).
-	old_meta_view.meta = os_atomic_load(block_metap, dependency);
+	old_meta_view.meta = os_atomic_load(block_metap, relaxed);
 
 	// Calculating blockp and ptr is relatively expensive. Do both lazily to
 	// minimize the time in the block starting with "again:" and ending with the
@@ -2461,8 +2264,21 @@
 		if (old_meta_view.meta.next_slot == SLOT_CAN_MADVISE ||
 				old_meta_view.meta.next_slot == SLOT_MADVISING ||
 				old_meta_view.meta.next_slot == SLOT_MADVISED) {
-			*madvise_block_metap_out = block_metap;
-			return NULL;
+			_malloc_lock_lock(&nanozone->madvise_lock);
+			if (old_meta_view.meta.next_slot == SLOT_MADVISED) {
+				// We raced against another thread madvising this block. We need
+				// to redo the madvise because we may have touched it when
+				// reading the next pointer in the freelist.
+				if (!blockp) {
+					blockp = nanov2_block_address_from_meta_ptr(nanozone, block_metap);
+				}
+				if (mvm_madvise_free(nanozone, nanov2_region_address_for_ptr(blockp),
+						(uintptr_t)blockp, (uintptr_t)(blockp + 1), NULL, FALSE)) {
+					malloc_zone_error(0, false,
+							"Failed to remadvise block at blockp: %p, error: %d\n", blockp, errno);
+				}
+			}
+			_malloc_lock_unlock(&nanozone->madvise_lock);
 		}
 		goto again;
 	}
@@ -2474,40 +2290,26 @@
 		ptr = nanov2_slot_in_block_ptr(blockp, size_class, slot);
 	}
 
+	nanov2_free_slot_t *slotp = os_atomic_inject_dependency(ptr,
+			(unsigned long)old_meta_view.bits);
 	if (from_free_list) {
 		// We grabbed the item from the free list. Check the free list canary
 		// and crash if it's not valid. We can't do this check before the
 		// cmpxchgv because another thread may race with us, claim the slot and
 		// write to it.
-		nanov2_free_slot_t *slotp = ptr;
 		uintptr_t guard = os_atomic_load(&slotp->double_free_guard, relaxed);
-		if (os_unlikely((guard ^ nanozone->slot_freelist_cookie) != (uintptr_t)ptr)) {
-			*corruption = true;
-		}
-	}
-
+		if ((guard ^ nanozone->slot_freelist_cookie) != (uintptr_t)ptr) {
+			malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
+					"Heap corruption detected, free list is damaged at %p\n"
+					"*** Incorrect guard value: %lu\n", ptr, guard);
+			__builtin_unreachable();
+		}
+	}
+	
 #if DEBUG_MALLOC
 	nanozone->statistics.size_class_statistics[size_class].total_allocations++;
 #endif // DEBUG_MALLOC
 
-	return ptr;
-}
-
-static void *
-nanov2_allocate_from_block(nanozonev2_t *nanozone,
-		nanov2_block_meta_t *block_metap, nanov2_size_class_t size_class)
-{
-	nanov2_block_meta_t *madvise_block_metap = NULL;
-	bool corruption = false;
-	void *ptr = nanov2_allocate_from_block_inline(nanozone, block_metap,
-			size_class, &madvise_block_metap, &corruption);
-	if (os_unlikely(corruption)) {
-		nanov2_guard_corruption_detected(ptr);
-	}
-	if (madvise_block_metap) {
-		nanov2_madvise_block(nanozone, madvise_block_metap, size_class,
-				SLOT_MADVISED);
-	}
 	return ptr;
 }
 
@@ -2855,36 +2657,39 @@
 	return NULL;
 }
 
-// This function is called when a fast-path allocation from a given (size_class,
-// allocation_index) has been tried and failed, and we need to act on
-// observations from that attempt and/or retry the allocation.  Its rather
-// tortured calling contract is designed to allow the caller to avoid pushing a
-// frame and pass along as much of what it has already computed as possible.
+// Allocates memory of a given size (which must be a multiple of the Nano
+// quantum size) and optionally clears it (for calloc).
 //
-// If @corrupt_slot is non-NULL it means we detected corruption of the slot's
-// guard on the fast path, and we need to report that corruption.
-//
-// If @madvise_block_metap is non-NULL it means we raced with another thread
-// madvising the block we tried to allocate from and need to re-madvise it.
+// Allocation is attempted first from the block last used for the caller's
+// context (which is initially the physical CPU by default). If there is no
+// last block, or the block is full or now out of use, find another one, if
+// possible. See the comments for nanov2_get_allocation_block() for the details.
 //
 // If the allocation fails, NULL is returned.
-static void *
-nanov2_allocate_outlined(nanozonev2_t *nanozone, nanov2_block_meta_t **block_metapp,
-		size_t rounded_size, nanov2_size_class_t size_class,
-		int allocation_index, nanov2_block_meta_t *madvise_block_metap,
-		void *corrupt_slot, bool clear)
+void *
+nanov2_allocate(nanozonev2_t *nanozone, size_t rounded_size, boolean_t clear)
 {
 	void *ptr = NULL;
-
-	if (os_unlikely(corrupt_slot)) {
-		nanov2_guard_corruption_detected(corrupt_slot);
-	}
-
-	// If we need to re-madvise the old block that we might have accidentally
-	// touched out of turn, do so now.
-	if (madvise_block_metap) {
-		nanov2_madvise_block(nanozone, madvise_block_metap, size_class,
-				SLOT_MADVISED);
+	nanov2_size_class_t size_class = nanov2_size_class_from_size(rounded_size);
+	MALLOC_ASSERT(size_class < NANO_SIZE_CLASSES);
+	MALLOC_ASSERT(rounded_size != 0);
+	nanov2_block_meta_t *block_metap;
+	nanov2_block_meta_t **block_metapp;
+
+	// Get the index of the pointer to the block from which we are should be
+	// allocating. This currently depends on the physical CPU number.
+	int allocation_index = nanov2_get_allocation_block_index() & MAX_CURRENT_BLOCKS_MASK;
+
+	// Get the current allocation block meta data pointer. If this is NULL,
+	// we need to find a new allocation block.
+	block_metapp = &nanozone->current_block[size_class][allocation_index];
+	block_metap = os_atomic_load(block_metapp, relaxed);
+	if (block_metap) {
+		// Fast path: we have a block -- try to allocate from it.
+		ptr = nanov2_allocate_from_block(nanozone, block_metap, size_class);
+		if (ptr) {
+			goto done;
+		}
 	}
 
 	// No current allocation block, or we were unable to allocate. We need to
@@ -2901,7 +2706,7 @@
 	_malloc_lock_s *lock = &nanozone->current_block_lock[size_class][allocation_index];
 	_malloc_lock_lock(lock);
 
-	nanov2_block_meta_t *block_metap = os_atomic_load(block_metapp, relaxed);
+	block_metap = os_atomic_load(block_metapp, relaxed);
 	if (block_metap) {
 		ptr = nanov2_allocate_from_block(nanozone, block_metap, size_class);
 		if (ptr) {
@@ -2923,43 +2728,21 @@
 		// We could not find a block to allocate from -- make future
 		// allocations for this size class go to the helper zone until
 		// we have enough free space.
-		os_atomic_or(&nanozone->delegate_allocations,
-				(uint16_t)(1 << size_class), relaxed);
-
-		ptr = nanozone->helper_zone->malloc(nanozone->helper_zone, rounded_size);
+		_malloc_lock_lock(&nanozone->delegate_allocations_lock);
+		nanozone->delegate_allocations |= 1 << size_class;
+		_malloc_lock_unlock(&nanozone->delegate_allocations_lock);
 	}
 
 done:
-	if (os_likely(ptr)) {
-		switch (malloc_zero_policy) {
-		case MALLOC_ZERO_ON_FREE: {
+	if (ptr) {
+		if (clear) {
+			memset(ptr, '\0', rounded_size);
+		} else {
 			// Always clear the double-free guard so that we can recognize that
 			// this block is not on the free list.
 			nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
 			os_atomic_store(&slotp->double_free_guard, 0, relaxed);
-
-			// We know the body of the allocation is already clear, so we just
-			// need to clean up the next_slot word to get to all-zero.  Do so in
-			// all cases, even if a cleared allocation is not requested, to
-			// prevent any leakage through the next_slot bits.
-			os_atomic_store(&slotp->next_slot, 0, relaxed);
-			break;
-		}
-		case MALLOC_ZERO_NONE:
-			if (!clear) {
-				// Always clear the double-free guard so that we can recognize that
-				// this block is not on the free list.
-				nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
-				os_atomic_store(&slotp->double_free_guard, 0, relaxed);
-				break;
-			}
-			// fall through
-		case MALLOC_ZERO_ON_ALLOC:
-			memset(ptr, '\0', rounded_size);
-			break;
-		}
-	} else {
-		malloc_set_errno_fast(MZ_POSIX, ENOMEM);
+		}
 	}
 	return ptr;
 }
@@ -2968,32 +2751,20 @@
 #pragma mark Freeing
 
 // Frees an allocation to its owning block and updates the block's state.
-//
-// If the block becomes empty, it is marked as SLOT_CAN_MADVISE and we return
-// the block to the caller to madvise if dictated by policy.
-MALLOC_ALWAYS_INLINE MALLOC_INLINE
-nanov2_block_meta_t *
-nanov2_free_to_block_inline(nanozonev2_t *nanozone, void *ptr,
-		nanov2_size_class_t size_class, nanov2_block_meta_t *block_metap)
+// If the block becomes empty, it is marked as SLOT_CAN_MADVISE and is
+// madvised immediately if the policy is NANO_MADVISE_IMMEDIATE.
+void
+nanov2_free_to_block(nanozonev2_t *nanozone, void *ptr,
+		nanov2_size_class_t size_class)
 {
 	nanov2_block_t *blockp = nanov2_block_address_for_ptr(ptr);
-	if (!block_metap) {
-		block_metap = nanov2_meta_ptr_for_ptr(nanozone, ptr);
-	}
+	nanov2_block_meta_t *block_metap = nanov2_meta_ptr_for_ptr(nanozone, ptr);
 
 	// Release the slot memory onto the block's freelist.
 	nanov2_block_meta_t old_meta = os_atomic_load(block_metap, relaxed);
 	int slot_count = slots_by_size_class[size_class];
 	nanov2_block_meta_t new_meta;
 	boolean_t was_full;
-
-	nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
-	// All of the free slot content (double_free_guard, next_slot word and the
-	// zeroed remainder of the slot) must be visible when the os_atomic_cmpxchgv
-	// completes, so the metadata updates on either path below need a release
-	// barrier.
-	os_atomic_store(&slotp->double_free_guard,
-			nanozone->slot_freelist_cookie ^ (uintptr_t)ptr, relaxed);
 
 again:
 	was_full = old_meta.next_slot == SLOT_FULL;
@@ -3003,14 +2774,12 @@
 	boolean_t freeing_last_active_slot = !was_full &&
 			new_meta.free_count == slots_by_size_class[size_class] - 1;
 	if (freeing_last_active_slot) {
-		os_atomic_store(&slotp->next_slot, SLOT_NULL, relaxed);
-
 		// Releasing the last active slot onto the free list. Mark the block as
 		// ready to be madvised if it's not in use, otherwise reset next_slot
 		// to SLOT_BUMP.
 		new_meta.next_slot = new_meta.in_use ? SLOT_BUMP : SLOT_CAN_MADVISE;
 		// Write the updated meta data; try again if we raced with another thread.
-		if (!os_atomic_cmpxchgv(block_metap, old_meta, new_meta, &old_meta, release)) {
+		if (!os_atomic_cmpxchgv(block_metap, old_meta, new_meta, &old_meta, relaxed)) {
 			goto again;
 		}
 
@@ -3018,14 +2787,20 @@
 		// is to do so immediately.
 		if (new_meta.next_slot == SLOT_CAN_MADVISE &&
 				nanov2_madvise_policy == NANO_MADVISE_IMMEDIATE) {
-			return block_metap;
+			_malloc_lock_lock(&nanozone->madvise_lock);
+			nanov2_madvise_block(nanozone, block_metap, blockp, size_class);
+			_malloc_lock_unlock(&nanozone->madvise_lock);
 		}
 	} else {
 		int slot_index = nanov2_slot_index_in_block(blockp, size_class, ptr);
 		new_meta.next_slot = slot_index + 1;  // meta.next_slot is 1-based
-		os_atomic_store(&slotp->next_slot,
-				was_full ? SLOT_BUMP : old_meta.next_slot, relaxed);
-
+		nanov2_free_slot_t *slotp = (nanov2_free_slot_t *)ptr;
+		slotp->next_slot = was_full ? SLOT_BUMP : old_meta.next_slot;
+		os_atomic_store(&slotp->double_free_guard,
+				nanozone->slot_freelist_cookie ^ (uintptr_t)ptr, relaxed);
+
+		// The double_free_guard change must be visible when the os_atomic_cmpxchgv
+		// completes.
 		// Write the updated meta data; try again if we raced with another thread.
 		if (!os_atomic_cmpxchgv(block_metap, old_meta, new_meta, &old_meta, release)) {
 			goto again;
@@ -3039,14 +2814,14 @@
 	uint16_t class_mask = 1 << size_class;
 	if (!new_meta.in_use && (nanozone->delegate_allocations & class_mask) &&
 			(new_meta.free_count >= 0.75 * slot_count)) {
-		os_atomic_and(&nanozone->delegate_allocations, ~class_mask, relaxed);
+		_malloc_lock_lock(&nanozone->delegate_allocations_lock);
+		nanozone->delegate_allocations &= ~class_mask;
+		_malloc_lock_unlock(&nanozone->delegate_allocations_lock);
 	}
 
 #if DEBUG_MALLOC
 	nanozone->statistics.size_class_statistics[size_class].total_frees++;
 #endif // DEBUG_MALLOC
-
-	return NULL;
 }
 
 #endif // OS_VARIANT_RESOLVED
@@ -3075,14 +2850,9 @@
 	}
 
 	// Set up the basic_zone portion of the nanozonev2 structure
-	nanozone->basic_zone.version = 13;
+	nanozone->basic_zone.version = 12;
 	nanozone->basic_zone.size = OS_RESOLVED_VARIANT_ADDR(nanov2_size);
-	if (malloc_zero_policy == MALLOC_ZERO_ON_ALLOC) {
-		nanozone->basic_zone.malloc =
-				OS_RESOLVED_VARIANT_ADDR(nanov2_malloc_zero_on_alloc);
-	} else {
-		nanozone->basic_zone.malloc = OS_RESOLVED_VARIANT_ADDR(nanov2_malloc);
-	}
+	nanozone->basic_zone.malloc = OS_RESOLVED_VARIANT_ADDR(nanov2_malloc);
 	nanozone->basic_zone.calloc = OS_RESOLVED_VARIANT_ADDR(nanov2_calloc);
 	nanozone->basic_zone.valloc = (void *)nanov2_valloc;
 	nanozone->basic_zone.free = OS_RESOLVED_VARIANT_ADDR(nanov2_free);
@@ -3092,11 +2862,10 @@
 	nanozone->basic_zone.batch_free = OS_RESOLVED_VARIANT_ADDR(nanov2_batch_free);
 	nanozone->basic_zone.introspect =
 			(struct malloc_introspection_t *)&nanov2_introspect;
-	nanozone->basic_zone.memalign = OS_RESOLVED_VARIANT_ADDR(nanov2_memalign);
+	nanozone->basic_zone.memalign = (void *)nanov2_memalign;
 	nanozone->basic_zone.free_definite_size = OS_RESOLVED_VARIANT_ADDR(nanov2_free_definite_size);
 	nanozone->basic_zone.pressure_relief = OS_RESOLVED_VARIANT_ADDR(nanov2_pressure_relief);
 	nanozone->basic_zone.claimed_address = OS_RESOLVED_VARIANT_ADDR(nanov2_claimed_address);
-	nanozone->basic_zone.try_free_default = OS_RESOLVED_VARIANT_ADDR(nanov2_try_free_default);
 
 	// Set these both to zero as required by CFAllocator.
 	nanozone->basic_zone.reserved1 = 0;
@@ -3191,14 +2960,6 @@
 			size);
 }
 
-static void *
-nanov2_forked_memalign(nanozonev2_t *nanozone, size_t alignment, size_t size)
-{
-	// Just hand to the helper zone.
-	return nanozone->helper_zone->memalign(nanozone->helper_zone, alignment,
-			size);
-}
-
 #endif // OS_VARIANT_NOTRESOLVED
 
 #if OS_VARIANT_RESOLVED
@@ -3337,11 +3098,10 @@
 	nanozone->basic_zone.batch_free = OS_RESOLVED_VARIANT_ADDR(nanov2_forked_batch_free);
 	nanozone->basic_zone.introspect =
 			(struct malloc_introspection_t *)&nanov2_introspect;// Unchanged
-	nanozone->basic_zone.memalign = (void *)nanov2_forked_memalign;
+	nanozone->basic_zone.memalign = (void *)nanov2_memalign; 	// Unchanged
 	nanozone->basic_zone.free_definite_size =
 			OS_RESOLVED_VARIANT_ADDR(nanov2_forked_free_definite_size);
 	nanozone->basic_zone.claimed_address = nanov2_forked_claimed_address;
-	nanozone->basic_zone.try_free_default = NULL; // Fall back to old protocol
 	mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ);
 }