Loading...
src/nanov2_malloc.c libmalloc-283 libmalloc-374.100.5
--- libmalloc/libmalloc-283/src/nanov2_malloc.c
+++ libmalloc/libmalloc-374.100.5/src/nanov2_malloc.c
@@ -53,7 +53,7 @@
 MALLOC_NOEXPORT extern size_t nanov2_pressure_relief(nanozonev2_t *nanozone, size_t goal);
 
 #if OS_VARIANT_RESOLVED
-MALLOC_NOEXPORT extern boolean_t nanov2_allocate_new_region(nanozonev2_t *nanozone);
+MALLOC_NOEXPORT extern nanov2_arena_t *nanov2_allocate_new_region(nanozonev2_t *nanozone);
 #endif // OS_VARIANT_RESOLVED
 
 #pragma mark -
@@ -239,13 +239,14 @@
 
 // Given a block metadata pointer, returns whether the block is active (that is,
 // it is being used for allocations, it has allocations that have not been freed,
-// or is waiting to be madvised).
+// or is waiting to be madvised  and is not a guard block).
 static MALLOC_ALWAYS_INLINE MALLOC_INLINE boolean_t
 nanov2_is_block_active(nanov2_block_meta_t block_meta)
 {
 	return block_meta.next_slot != SLOT_NULL
 			&& block_meta.next_slot != SLOT_MADVISING
-			&& block_meta.next_slot != SLOT_MADVISED;
+			&& block_meta.next_slot != SLOT_MADVISED
+			&& block_meta.next_slot != SLOT_GUARD;
 }
 
 #if OS_VARIANT_RESOLVED
@@ -312,7 +313,6 @@
 	return (void *)(((uintptr_t)ptr) & NANOV2_ARENA_ADDRESS_MASK);
 }
 
-#if OS_VARIANT_RESOLVED
 // Given a pointer that is assumed to be in the Nano zone, returns the address
 // of its containing region. Works for both real and logical pointers.
 static MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_region_t *
@@ -320,7 +320,6 @@
 {
 	return (nanov2_region_t *)(((uintptr_t)ptr) & NANOV2_REGION_ADDRESS_MASK);
 }
-#endif // OS_VARIANT_RESOLVED
 
 // Given a pointer that is assumed to be in the Nano zone, returns the real
 // address of its metadata block. Works for both real and logical pointers.
@@ -394,15 +393,40 @@
 	return (nanov2_arena_t *)region;
 }
 
+#if OS_VARIANT_RESOLVED
+// Given an atomically-observed current_region_next_arena pointer, returns
+// whether or not it's a usable arena or a limit arena (indicating exhaustion of
+// the current region).
+static MALLOC_ALWAYS_INLINE MALLOC_INLINE bool
+nanov2_current_region_next_arena_is_limit(
+		nanov2_arena_t *current_region_next_arena)
+{
+	// The first arena of a region is never stored in current_region_next_arena,
+	// so a value at the beginning of a region must be a limit arena.
+	return current_region_next_arena == (nanov2_arena_t *)(
+			nanov2_region_address_for_ptr(current_region_next_arena));
+}
+#endif // OS_VARIANT_RESOLVED
+
+// Given an atomically-observed current_region_next_arena pointer, returns the
+// base of the current region at the time of the observation.
+static MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_region_t *
+nanov2_current_region_base(nanov2_arena_t *current_region_next_arena)
+{
+	return nanov2_region_address_for_ptr(
+			(void *)(((uintptr_t)current_region_next_arena) - 1));
+}
+
 // Given a region pointer, returns a pointer to the arena after the last
 // active arena in the region.
 static MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_arena_t *
-nanov2_limit_arena_for_region(nanozonev2_t *nanozone, nanov2_region_t *region)
+nanov2_limit_arena_for_region(nanozonev2_t __unused *nanozone,
+		nanov2_region_t *region, nanov2_arena_t *current_region_next_arena)
 {
 	// The first arena is colocated with the region itself.
 	nanov2_arena_t *limit_arena;
-	if (region == nanozone->current_region_base) {
-		limit_arena = nanozone->current_region_next_arena;
+	if (region == nanov2_current_region_base(current_region_next_arena)) {
+		limit_arena = current_region_next_arena;
 	} else {
 		limit_arena = nanov2_first_arena_for_region(region + 1);
 	}
@@ -423,14 +447,23 @@
 
 #if OS_VARIANT_RESOLVED
 // Given a pointer to a region, returns a pointer to the region that follows it,
-// or NULL if there isn't one.
+// or NULL if there isn't one. We may observe linkage to a new region that
+// hasn't yet actually been installed into current_region_next_arena; ignore the
+// linkage in this case.
 static MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_region_t *
-nanov2_next_region_for_region(nanozonev2_t *nanozone, nanov2_region_t *region)
+nanov2_next_region_for_region(nanozonev2_t *nanozone, nanov2_region_t *region,
+		nanov2_arena_t *current_region_next_arena)
 {
 	nanov2_region_linkage_t *linkage =
 			nanov2_region_linkage_for_region(nanozone, region);
-	int offset = linkage->next_region_offset;
-	return offset ? region + offset : NULL;
+	int offset = os_atomic_load(&linkage->next_region_offset, relaxed);
+	if (!offset) {
+		return NULL;
+	}
+
+	nanov2_region_t *next_region = region + offset;
+	return (nanov2_arena_t *)next_region < current_region_next_arena ?
+			next_region : NULL;
 }
 #endif // OS_VARIANT_RESOLVED
 
@@ -440,14 +473,21 @@
 // for another process.
 static MALLOC_ALWAYS_INLINE MALLOC_INLINE nanov2_region_t *
 nanov2_next_region_for_region_offset(nanozonev2_t *nanozone,
-        nanov2_region_t *region, off_t region_offset)
-{
-    nanov2_region_linkage_t *linkage =
-            nanov2_region_linkage_for_region(nanozone, region);
-    nanov2_region_linkage_t *mapped_linkage = (nanov2_region_linkage_t *)
-        ((uintptr_t)linkage + region_offset);
-    int offset = mapped_linkage->next_region_offset;
-    return offset ? region + offset : NULL;
+		nanov2_region_t *region, off_t region_offset,
+		nanov2_arena_t *current_region_next_arena)
+{
+	nanov2_region_linkage_t *linkage =
+			nanov2_region_linkage_for_region(nanozone, region);
+	nanov2_region_linkage_t *mapped_linkage = (nanov2_region_linkage_t *)(
+			((uintptr_t)linkage + region_offset));
+	int offset = os_atomic_load(&mapped_linkage->next_region_offset, relaxed);
+	if (!offset) {
+		return NULL;
+	}
+
+	nanov2_region_t *next_region = region + offset;
+	return (nanov2_arena_t *)next_region < current_region_next_arena ?
+			next_region : NULL;
 }
 #endif // OS_VARIANT_NOTRESOLVED
 
@@ -595,15 +635,15 @@
 #if CONFIG_NANO_USES_HYPER_SHIFT
 	if (os_likely(nano_common_max_magazines_is_ncpu)) {
 		// Default case is max magazines == physical number of CPUs, which
-		// must be > _os_cpu_number() >> hyper_shift, so the modulo
+		// must be > _malloc_cpu_number() >> hyper_shift, so the modulo
 		// operation is not required.
-		return _os_cpu_number() >> hyper_shift;
+		return _malloc_cpu_number() >> hyper_shift;
 	}
 #else // CONFIG_NANO_USES_HYPER_SHIFT
 	if (os_likely(nano_common_max_magazines_is_ncpu)) {
 		// Default case is max magazines == logical number of CPUs, which
-		// must be > _os_cpu_number() so the modulo operation is not required.
-		return _os_cpu_number();
+		// must be > _malloc_cpu_number() so the modulo operation is not required.
+		return _malloc_cpu_number();
 	}
 #endif // CONFIG_NANO_USES_HYPER_SHIFT
 
@@ -613,11 +653,62 @@
 #endif // CONFIG_NANO_USES_HYPER_SHIFT
 
 	if (os_likely(_os_cpu_number_override == -1)) {
-		return (_os_cpu_number() >> shift) % nano_common_max_magazines;
+		return (_malloc_cpu_number() >> shift) % nano_common_max_magazines;
 	}
 	return (_os_cpu_number_override >> shift) % nano_common_max_magazines;
 }
 #endif // OS_VARIANT_RESOLVED
+
+#pragma mark -
+#pragma mark Guard Blocks
+
+// Converts a given block (specified by absolute block number) in an arena into
+// a guard block. The block will be marked as in-use so that it is not available
+// for allocations and its permissions are set to PROT_READ. Note that
+// PROT_READ is used instead of PROT_NONE because the latter breaks the
+// enumerator, which tries to map the whole region and fails if there are
+// PROT_NONE pages in the range. We can't fix that in the allocator because the
+// code that does the mapping is part of the sampling tools and is simply
+// invoked as a callback from the enumerator.
+static MALLOC_ALWAYS_INLINE MALLOC_INLINE void
+nanov2_create_guard_block(nanozonev2_t *nanozone, nanov2_arena_t *arena,
+		nanov2_block_index_t block_index) {
+	// Mark the block as in-use in the meta data
+	static nanov2_block_meta_t in_use_block = {
+		.in_use = 1,
+		.next_slot = SLOT_GUARD
+	};
+	nanov2_meta_index_t	block_meta_index =
+			nanov2_block_index_to_meta_index(block_index);
+	nanov2_arena_metablock_t *block_metap = nanov2_metablock_address_for_ptr(
+			nanozone, arena);
+	block_metap->arena_block_meta[block_meta_index] = in_use_block;
+	void *block_ptr = &arena->blocks[block_index];
+
+	// Apply PROT_NONE to the block itself.
+	kern_return_t err = mprotect(block_ptr, NANOV2_BLOCK_SIZE, PROT_READ);
+	if (err != KERN_SUCCESS) {
+		malloc_report(ASL_LEVEL_ERR, "Failed to create guard block at %p (%d)\n",
+				block_ptr, err);
+	}
+}
+
+// Creates the guard blocks for an arena, if required. The guard blocks are
+// the first and last physical blocks in the arena that are not the metadata
+// block.
+static MALLOC_ALWAYS_INLINE MALLOC_INLINE void
+nanov2_init_guard_blocks(nanozonev2_t *nanozone, nanov2_arena_t *arena)
+{
+	if (nanozone->debug_flags & MALLOC_ALL_GUARD_PAGE_FLAGS) {
+		// Use the first and last blocks in the arena as guard regions,
+		// avoiding the metadata block.
+		nanov2_meta_index_t meta_index = nanov2_metablock_meta_index(nanozone);
+		nanov2_create_guard_block(nanozone, arena, meta_index == 0 ? 1 : 0);
+		nanov2_create_guard_block(nanozone, arena,
+				meta_index == NANOV2_BLOCKS_PER_ARENA - 1 ?
+					NANOV2_BLOCKS_PER_ARENA - 2 : NANOV2_BLOCKS_PER_ARENA - 1);
+	}
+}
 
 #pragma mark -
 #pragma mark Allocator Initialization
@@ -1191,9 +1282,12 @@
 	// until we reach our goal.
 	nanov2_region_t *region = nanozone->first_region_base;
 	nanov2_meta_index_t metablock_meta_index = nanov2_metablock_meta_index(nanozone);
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&nanozone->current_region_next_arena, acquire);
 	while (region) {
 		nanov2_arena_t *arena = nanov2_first_arena_for_region(region);
-		nanov2_arena_t *arena_after_region = nanov2_limit_arena_for_region(nanozone, region);
+		nanov2_arena_t *arena_after_region = nanov2_limit_arena_for_region(
+				nanozone, region, current_region_next_arena);
 		while (arena < arena_after_region) {
 			// Scan all of the blocks in the arena, skipping the metadata block.
 			nanov2_arena_metablock_t *meta_blockp =
@@ -1225,7 +1319,8 @@
 			}
 			arena++;
 		}
-		region = nanov2_next_region_for_region(nanozone, region);
+		region = nanov2_next_region_for_region(nanozone, region,
+				current_region_next_arena);
 	}
 
 done:
@@ -1282,7 +1377,7 @@
 	if (kr) {
 		return kr;
 	}
-	boolean_t self_zone = (nanozonev2_t *)zone_address == nanozone;
+	boolean_t self_zone = mach_task_is_self(task) && (nanozonev2_t *)zone_address == nanozone;
 	memcpy(&zone_copy, nanozone, sizeof(zone_copy));
 	nanozone = &zone_copy;
 	nanov2_meta_index_t metablock_meta_index = nanov2_metablock_meta_index(nanozone);
@@ -1290,6 +1385,8 @@
 	// Process the zone one region at a time. Report each in-use block as a
 	// pointer range and each in-use slot as a pointer.
 	nanov2_region_t *region = nanozone->first_region_base;
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&nanozone->current_region_next_arena, acquire);
 	while (region) {
 		mach_vm_address_t vm_addr = (mach_vm_address_t)NULL;
 		kern_return_t kr = reader(task, (vm_address_t)region, NANOV2_REGION_SIZE, (void **)&vm_addr);
@@ -1301,7 +1398,8 @@
 		// and its mapped address in this process.
 		mach_vm_offset_t ptr_offset = (mach_vm_address_t)region - vm_addr;
 		nanov2_arena_t *arena = nanov2_first_arena_for_region(region);
-		nanov2_arena_t *limit_arena = nanov2_limit_arena_for_region(nanozone, region);
+		nanov2_arena_t *limit_arena = nanov2_limit_arena_for_region(nanozone, region,
+				current_region_next_arena);
 		vm_range_t ptr_range;
 		while (arena < limit_arena) {
 			// Find the metadata block and process every entry, apart from the
@@ -1415,7 +1513,8 @@
 		nanov2_region_linkage_t *mapped_region_linkagep =
 				NANOV2_ZONE_PTR_TO_MAPPED_PTR(nanov2_region_linkage_t *,
 				region_linkagep, ptr_offset);
-		int offset = mapped_region_linkagep->next_region_offset;
+		int offset = os_atomic_load(&mapped_region_linkagep->next_region_offset,
+				relaxed);
 		region = offset ? region + offset : NULL;
 	}
 	return 0;
@@ -1434,7 +1533,7 @@
 static boolean_t
 nanov2_check(nanozonev2_t *nanozone)
 {
-	// Does nothing, just like Nano V1.
+	// Does nothing
 	return 1;
 }
 
@@ -1494,6 +1593,10 @@
 	nanov2_meta_index_t metablock_meta_index =
 			nanov2_metablock_meta_index(mapped_nanozone);
 	nanov2_region_t *region = mapped_nanozone->first_region_base;
+	// Use a single, consistent snapshot of current_region_next_arena throughout
+	// iteration, ignoring any arenas or regions allocated after it.
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&mapped_nanozone->current_region_next_arena, acquire);
 	int region_index = 0;
 	while (region) {
 		printer("\nRegion %d: base address %p\n", region_index, region);
@@ -1507,7 +1610,7 @@
 
 		nanov2_arena_t *arena = nanov2_first_arena_for_region(region);
 		nanov2_arena_t *limit_arena = nanov2_limit_arena_for_region(
-				mapped_nanozone, region);
+				mapped_nanozone, region, current_region_next_arena);
 		int arena_index = 0;
 		while (arena < limit_arena) {
 			// Find the metadata block and process every entry, apart from the
@@ -1648,7 +1751,7 @@
 		}
 
 		region = nanov2_next_region_for_region_offset(mapped_nanozone, region,
-                region_offset);
+                region_offset, current_region_next_arena);
 		region_index++;
 	}
 }
@@ -1670,7 +1773,7 @@
 static void
 nanov2_log(malloc_zone_t *zone, void *log_address)
 {
-	// Does nothing, just like Nano V1.
+	// Does nothing
 }
 
 static void
@@ -1699,7 +1802,7 @@
 }
 
 static void
-null_printer(const char __unused *fmt, ...)
+nanov2_null_printer(const char __unused *fmt, ...)
 {
 }
 
@@ -1708,7 +1811,7 @@
 		memory_reader_t reader, print_task_printer_t printer,
 		malloc_statistics_t *stats)
 {
-	printer = printer ? printer : null_printer;
+	printer = printer ? printer : nanov2_null_printer;
 	reader = !reader && task == mach_task_self() ? _malloc_default_reader : reader;
 
 	kern_return_t err;
@@ -1736,6 +1839,8 @@
 
 	// Iterate over each arena in each region. Within each region, add
 	// statistics for each slot in each block, excluding the meta data block.
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&mapped_nanozone->current_region_next_arena, acquire);
 	for (region = mapped_nanozone->first_region_base; region;) {
         nanov2_region_t *mapped_region;
 		err = reader(task, (vm_address_t)region, sizeof(nanov2_region_t), (void **)&mapped_region);
@@ -1745,7 +1850,8 @@
         }
         off_t region_offset = (uintptr_t)mapped_region - (uintptr_t)region;
 		for (arena = nanov2_first_arena_for_region(region);
-				arena < nanov2_limit_arena_for_region(mapped_nanozone, region);
+				arena < nanov2_limit_arena_for_region(mapped_nanozone, region,
+						current_region_next_arena);
 				arena++) {
 			nanov2_arena_metablock_t *meta_block =
 					nanov2_metablock_address_for_ptr(mapped_nanozone, arena);
@@ -1773,6 +1879,8 @@
 				case SLOT_MADVISING:
 					// FALLTHRU
 				case SLOT_MADVISED:
+					// FALLTHRU
+				case SLOT_GUARD:
 					// These blocks have no active content.
 					break;
 				case SLOT_FULL:
@@ -1795,7 +1903,7 @@
 			}
 		}
         region = nanov2_next_region_for_region_offset(mapped_nanozone,
-                region, region_offset);
+                region, region_offset, current_region_next_arena);
 	}
 	return KERN_SUCCESS;
 }
@@ -1868,25 +1976,40 @@
 		return 0;
 	}
 
+	// Atomically load the value of current_region_next_arena. No thread is
+	// allowed to allocate from an arena until it observes a greater value of
+	// current_region_next_arena, which must have happened before now if we're
+	// being called in the context of a deallocation, so we can safely use it as
+	// the upper bound for an overall address range check.
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&nanozone->current_region_next_arena, relaxed);
+
 	// Bounds check against the active address space.
 	if (ptr < (void *)nanozone->first_region_base ||
-			ptr > (void *)nanozone->current_region_next_arena) {
+			ptr > (void *)current_region_next_arena) {
 		return 0;
 	}
 
 #if NANOV2_MULTIPLE_REGIONS
 	// Need to check that the region part is valid because there could be holes.
 	// Do this only if we know there is a hole.
-	// NOTE: in M2 convergence, use a hashed structure to make this more
-	// efficient.
-	if (nanozone->statistics.region_address_clashes) {
+	//
+	// If we're looking at a legitimately-allocated nano pointer, a load-acquire
+	// of current_region_next_arena must have already happened when its
+	// containing arena was first allocated from, so any region_address_clashes
+	// increment that preceded the store-release of current_region_next_arena
+	// should be visible.
+	//
+	// TODO: use a hashed structure to make this more efficient.
+	if (os_atomic_load(&nanozone->statistics.region_address_clashes, relaxed)) {
 		nanov2_region_t *ptr_region = nanov2_region_address_for_ptr(ptr);
 		nanov2_region_t *region = nanozone->first_region_base;
 		while (region) {
 			if (ptr_region == region) {
 				break;
 			}
-			region = nanov2_next_region_for_region(nanozone, region);
+			region = nanov2_next_region_for_region(nanozone, region,
+					current_region_next_arena);
 		}
 		if (!region) {
 			// Reached the end of the region list without matching - not a
@@ -2015,45 +2138,62 @@
 
 // Allocates a new region adjacent to the current one. If the allocation fails,
 // keep sliding up by the size of a region until we either succeed or run out of
-// address space. The caller must own the Nanozone regions lock.
-MALLOC_NOEXPORT boolean_t
+// address space. The caller must own the Nanozone regions lock. Returns the
+// first arena of the newly-allocated region if successful, or NULL otherwise.
+MALLOC_NOEXPORT nanov2_arena_t *
 nanov2_allocate_new_region(nanozonev2_t *nanozone)
 {
 #if NANOV2_MULTIPLE_REGIONS
-	boolean_t result = FALSE;
+	bool allocated = false;
 
 	_malloc_lock_assert_owner(&nanozone->regions_lock);
-	nanov2_region_t *current_region = nanozone->current_region_base;
-	nanov2_region_t *next_region = (nanov2_region_t *)nanozone->current_region_limit;
+	nanov2_region_t *current_region = nanov2_current_region_base(
+			os_atomic_load(&nanozone->current_region_next_arena, relaxed));
+	nanov2_region_t *next_region = current_region + 1;
 	while ((void *)next_region <= nanov2_max_region_base.addr) {
 		if (nanov2_allocate_region(next_region)) {
-			nanozone->current_region_base = next_region;
-			nanozone->current_region_next_arena = (nanov2_arena_t *)next_region;
-			nanozone->current_region_limit = next_region + 1;
 			nanozone->statistics.allocated_regions++;
-			result = TRUE;
+			allocated = true;
 			break;
 		}
 		next_region++;
-		nanozone->statistics.region_address_clashes++;
-	}
-
-	if (result) {
-		// Link this region to the previous one.
-		nanov2_region_linkage_t *current_region_linkage =
-				nanov2_region_linkage_for_region(nanozone, current_region);
-		nanov2_region_linkage_t *next_region_linkage =
-				nanov2_region_linkage_for_region(nanozone, next_region);
-		uint16_t offset = next_region - current_region;
-		current_region_linkage->next_region_offset = offset;
-		next_region_linkage->next_region_offset = 0;
-	}
-
-	return result;
+
+		// Loaded atomically in nanov2_pointer_size() to determine whether or
+		// not it's necessary to walk the region list, so we need to increment
+		// atomically here. Published by the store-release of
+		// current_region_next_arena.
+		os_atomic_inc(&nanozone->statistics.region_address_clashes, relaxed);
+	}
+
+	if (!allocated) {
+		return NULL;
+	}
+
+	// Link this region to the previous one.
+	nanov2_region_linkage_t *current_region_linkage =
+			nanov2_region_linkage_for_region(nanozone, current_region);
+
+	// The linkage of the next region is in pristine memory, so already zero -
+	// don't touch it.
+
+	// Store-release the linkage update so any dependent loads through it
+	// observe the (implicit zero-)initialization of the next region.
+	uint16_t offset = next_region - current_region;
+	os_atomic_store(&current_region_linkage->next_region_offset, offset,
+			release);
+
+	// Store-release the update to current_region_next_arena to publish the
+	// linkage update. Pairs with load-acquires of current_region_next_arena
+	// followed by walks of the region list.
+	nanov2_arena_t *first_arena = nanov2_first_arena_for_region(next_region);
+	os_atomic_store(&nanozone->current_region_next_arena, first_arena + 1,
+			release);
+
+	return first_arena;
 #else // NANOV2_MULTIPLE_REGIONS
 	// On iOS, only one region is supported, so we fail since the first
 	// region is allocated separately.
-	return FALSE;
+	return NULL;
 #endif // CONFIG_NANOV2_MULTIPLE_REGIONS
 }
 #endif // OS_VARIANT_NOTRESOLVED
@@ -2150,8 +2290,7 @@
 		ptr = nanov2_slot_in_block_ptr(blockp, size_class, slot);
 	}
 
-	nanov2_free_slot_t *slotp =
-			(nanov2_free_slot_t *)os_atomic_force_dependency_on(ptr,
+	nanov2_free_slot_t *slotp = os_atomic_inject_dependency(ptr,
 			(unsigned long)old_meta_view.bits);
 	if (from_free_list) {
 		// We grabbed the item from the free list. Check the free list canary
@@ -2160,7 +2299,7 @@
 		// write to it.
 		uintptr_t guard = os_atomic_load(&slotp->double_free_guard, relaxed);
 		if ((guard ^ nanozone->slot_freelist_cookie) != (uintptr_t)ptr) {
-			malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, false,
+			malloc_zone_error(MALLOC_ABORT_ON_CORRUPTION, true,
 					"Heap corruption detected, free list is damaged at %p\n"
 					"*** Incorrect guard value: %lu\n", ptr, guard);
 			__builtin_unreachable();
@@ -2405,8 +2544,13 @@
 	start_region = nanov2_region_address_for_ptr(arena);
 	nanov2_arena_t *start_arena = arena;
 	nanov2_region_t *region = start_region;
-	nanov2_arena_t *limit_arena = nanov2_limit_arena_for_region(nanozone, start_region);
-	nanov2_arena_t *initial_region_next_arena = nanozone->current_region_next_arena;
+	// The load-acquire pairs with store-release in nanov2_allocate_new_region()
+	// to make the most recent region linkage update visible when we load it in
+	// nanov2_next_region_for_region() below.
+	nanov2_arena_t *initial_region_next_arena = os_atomic_load(
+			&nanozone->current_region_next_arena, acquire);
+	nanov2_arena_t *limit_arena = nanov2_limit_arena_for_region(nanozone,
+			start_region, initial_region_next_arena);
 	do {
 		nanov2_block_meta_t *block_metap = nanov2_find_block_in_arena(nanozone,
 				arena, size_class, start_block);
@@ -2442,13 +2586,15 @@
 		start_block = NULL;
 		arena++;
 		if (arena >= limit_arena) {
-			region = nanov2_next_region_for_region(nanozone, region);
+			region = nanov2_next_region_for_region(nanozone, region,
+					initial_region_next_arena);
 			if (!region) {
 				// Reached the last region -- loop back to the first.
 				region = nanozone->first_region_base;
 			}
 			arena = nanov2_first_arena_for_region(region);
-			limit_arena = nanov2_limit_arena_for_region(nanozone, region);
+			limit_arena = nanov2_limit_arena_for_region(nanozone, region,
+					initial_region_next_arena);
 		}
 	} while (arena != start_arena);
 
@@ -2459,24 +2605,42 @@
 	}
 
 	// Allocate a new arena and maybe a new region. To do either of those
-	// things, we need to take the regions_lock. After doing so, check that
-	// the state is unchanged. If it has, just assume that we might have some
-	// new space to allocate into and try again.
+	// things, we need to take the regions_lock. After doing so, check that the
+	// state is unchanged. If it has, just assume that we might have some new
+	// space to allocate into and try again.
+
 	boolean_t failed = FALSE;
-	arena = initial_region_next_arena;
+
 	_malloc_lock_lock(&nanozone->regions_lock);
-	if (nanozone->current_region_next_arena == arena) {
-		if ((void *)arena >= nanozone->current_region_limit) {
+	nanov2_arena_t *current_region_next_arena = os_atomic_load(
+			&nanozone->current_region_next_arena, relaxed);
+	if (current_region_next_arena == initial_region_next_arena) {
+		if (nanov2_current_region_next_arena_is_limit(
+				current_region_next_arena)) {
 			// Reached the end of the region. Allocate a new one, if we can.
-			if (nanov2_allocate_new_region(nanozone)) {
-				arena = nanozone->current_region_next_arena++;
-			} else {
+			arena = nanov2_allocate_new_region(nanozone);
+			if (!arena) {
 				failed = TRUE;
 			}
 		} else {
-			// Assign the new arena, in the same region.
-			nanozone->current_region_next_arena = arena + 1;
-		}
+			// Assign the new arena, in the current region.
+			arena = current_region_next_arena;
+
+			// Bump current_region_next_arena by 1. No need for an atomic add
+			// because we're under the regions_lock.
+			os_atomic_store(&nanozone->current_region_next_arena,
+					current_region_next_arena + 1, relaxed);
+		}
+
+		// Set up the guard blocks for the new arena, if requested
+		if (!failed) {
+			nanov2_init_guard_blocks(nanozone, arena);
+		}
+	} else {
+		// The arena just before current_region_next_arena is always the most
+		// recently allocated arena. Let's retry from that arena, which was
+		// allocated in the time since we started our last try.
+		arena = current_region_next_arena - 1;
 	}
 	_malloc_lock_unlock(&nanozone->regions_lock);
 
@@ -2710,12 +2874,6 @@
 	// Prevent overwriting the function pointers in basic_zone.
 	mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ);
 
-	// Nano V2 zone does not support MALLOC_ADD_GUARD_PAGES
-	if (debug_flags & MALLOC_ADD_GUARD_PAGES) {
-		malloc_report(ASL_LEVEL_INFO, "nano does not support guard pages\n");
-		debug_flags &= ~MALLOC_ADD_GUARD_PAGES;
-	}
-
 	// Set up the remainder of the nanozonev2 structure
 	nanozone->debug_flags = debug_flags;
 	nanozone->helper_zone = helper_zone;
@@ -2758,14 +2916,16 @@
 	}
 	nanov2_region_linkage_t *region_linkage =
 			nanov2_region_linkage_for_region(nanozone, region);
-	region_linkage->next_region_offset = 0;
+	os_atomic_store(&region_linkage->next_region_offset, 0, relaxed);
 
 	// Install the first region and pre-allocate the first arena.
 	nanozone->first_region_base = region;
-	nanozone->current_region_base = region;
-	nanozone->current_region_next_arena = ((nanov2_arena_t *)region) + 1;
-	nanozone->current_region_limit = region + 1;
+	os_atomic_store(&nanozone->current_region_next_arena,
+			((nanov2_arena_t *)region) + 1, release);
 	nanozone->statistics.allocated_regions = 1;
+
+	// Set up the guard blocks for the initial arena, if requested
+	nanov2_init_guard_blocks(nanozone, (nanov2_arena_t *)region);
 
 	return (malloc_zone_t *)nanozone;
 }