xzone_segment.c - src/xzone_malloc/xzone_segment.c - Libmalloc source code libmalloc-792.41.1

/* ----------------------------------------------------------------------------
Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
Copyright © 2025 Apple Inc.
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" in the same directory as this file.
-----------------------------------------------------------------------------*/

#include "../internal.h"

#if CONFIG_XZONE_MALLOC

static void
xzm_madvise(xzm_malloc_zone_t zone, uint8_t *start, size_t size);

static void
_xzm_segment_group_segment_deallocate(xzm_segment_group_t sg,
		xzm_segment_t segment, bool free_from_table);

#pragma mark segment map

// mimalloc: _mi_segment_map_allocated_at
static void
_xzm_segment_table_allocated_at(xzm_main_malloc_zone_t main, void *data,
		xzm_segment_t metadata, bool normal)
{
	xzm_debug_assert((uintptr_t)data % XZM_SEGMENT_SIZE == 0);

	void *segment_end = _xzm_segment_end(metadata);
	xzm_debug_assert((uintptr_t)data < (uintptr_t)segment_end);

	xzm_segment_table_entry_s entry_val =
			_xzm_segment_to_segment_table_entry(metadata, normal);

	while (data < segment_end) {
#if CONFIG_EXTERNAL_METADATA_LARGE
		// If this allocation is in a new 64GB granule, allocate a new leaf
		// table to store the metadata pointers in
		size_t ext_idx = 0;
		__assert_only size_t index = _xzm_segment_table_index_of(data, &ext_idx);
		xzm_debug_assert(index < XZM_SEGMENT_TABLE_ENTRIES);
		xzm_debug_assert(ext_idx < XZM_EXTENDED_SEGMENT_TABLE_ENTRIES);

		if (ext_idx != 0) {
			xzm_extended_segment_table_entry_s *ext_addr =
					&main->xzmz_extended_segment_table[ext_idx];
			xzm_extended_segment_table_entry_s ext_entry = { 0 };
			ext_entry = os_atomic_load(ext_addr, relaxed);
			if (ext_entry.xeste_val == 0) {
				// Need to allocate a new segment table since this pointer is in
				// a new segment table (64GB span)
				_malloc_lock_lock(&main->xzmz_extended_segment_table_lock);
				// Load the table entry again to see if another thread populated
				// it while we were acquiring the lock
				ext_entry = os_atomic_load(ext_addr, relaxed);
				if (ext_entry.xeste_val == 0) {
					xzm_metapool_t mp;
					mp = &main->xzmz_metapools[XZM_METAPOOL_SEGMENT_TABLE];
					void *leaf_table = xzm_metapool_alloc(mp);
					xzm_assert(leaf_table);
					xzm_debug_assert(((uintptr_t)leaf_table /
							XZM_SEGMENT_TABLE_ALIGN) <= UINT32_MAX);
					ext_entry.xeste_val = (uint32_t)((uintptr_t)leaf_table /
							XZM_SEGMENT_TABLE_ALIGN);
					os_atomic_store(ext_addr, ext_entry, relaxed);
				}
				_malloc_lock_unlock(&main->xzmz_extended_segment_table_lock);
			}
		}
#endif // CONFIG_EXTERNAL_METADATA_LARGE

		xzm_segment_table_entry_s *entry;
		entry = _xzm_ptr_to_table_entry(data, main);
		xzm_debug_assert(entry != NULL);

		xzm_debug_assert(entry->xste_val == 0);

		// Store-release to publish the segment and chunk initializations
		// TODO: document all paired dependency/acquire loads
		os_atomic_store(entry, entry_val, release);

		data = (void *)((uintptr_t)data + XZM_SEGMENT_SIZE);
	}
}

// mimalloc: _mi_segment_map_freed_at
static void
_xzm_segment_table_freed_at(xzm_main_malloc_zone_t main, void *data,
		xzm_segment_t metadata, __assert_only bool full_segment)
{
	void *end = _xzm_segment_end(metadata);
	xzm_debug_assert(!full_segment ||
			_xzm_segment_start(metadata) == data);
	while (data < end) {
		xzm_segment_table_entry_s *entry;
		entry = _xzm_ptr_to_table_entry(data, main);
		xzm_debug_assert(entry != NULL);
		xzm_debug_assert(_xzm_segment_to_segment_table_entry(metadata, false).xste_val ==
				entry->xste_val);
		xzm_segment_table_entry_s null_entry;
		null_entry = _xzm_segment_to_segment_table_entry(NULL, false);
		os_atomic_store(entry, null_entry, relaxed);

		data = (void *)((uintptr_t)data + XZM_SEGMENT_SIZE);
	}
}

#pragma mark vm reclaim

#if CONFIG_XZM_DEFERRED_RECLAIM

static struct xzm_reclaim_buffer_s xzm_reclaim_buffer;

static bool
_xzm_reclaim_id_cache_is_empty(xzm_reclaim_id_cache_t cache)
{
	return cache->ric_head == 0;
}

static uint64_t
_xzm_reclaim_id_cache_pop(xzm_reclaim_id_cache_t cache)
{
	xzm_debug_assert(!_xzm_reclaim_id_cache_is_empty(cache));
	uint64_t id = cache->ric_ids[--cache->ric_head];
	xzm_debug_assert(id != VM_RECLAIM_ID_NULL);
	return id;
}

static void
_xzm_reclaim_id_cache_push(xzm_reclaim_id_cache_t cache, mach_vm_reclaim_id_t id)
{
	xzm_assert(cache->ric_head < cache->ric_len);
	xzm_debug_assert(id != VM_RECLAIM_ID_NULL);
	cache->ric_ids[cache->ric_head++] = id;
}

static void
_xzm_reclaim_id_cache_init(xzm_reclaim_buffer_t buffer)
{
	xzm_reclaim_id_cache_t id_cache = &buffer->xrb_id_cache;
	mach_vm_reclaim_count_t max_buffer_count;
	mach_vm_reclaim_error_t kr = mach_vm_reclaim_ring_capacity(
			buffer->xrb_ringbuffer, &max_buffer_count);
	xzm_assert(kr == VM_RECLAIM_SUCCESS);
	size_t min_id_cache_size =
			max_buffer_count * sizeof(mach_vm_reclaim_id_t);
	size_t id_cache_size = round_page(min_id_cache_size);
	if (id_cache->ric_ids == NULL ||
			id_cache->ric_len < max_buffer_count) {
		mach_vm_reclaim_id_t *ids = (mach_vm_reclaim_id_t *)
				mvm_allocate_pages(id_cache_size, 0, MALLOC_ABORT_ON_ERROR,
				VM_MEMORY_MALLOC);
		if (id_cache->ric_ids != NULL) {
			// Deallocate the old cache
			mvm_deallocate_pages((void *)(id_cache->ric_ids),
					id_cache->ric_len * sizeof(mach_vm_reclaim_id_t),
					MALLOC_ABORT_ON_ERROR);
		}
		id_cache->ric_ids = ids;
		id_cache->ric_len = id_cache_size / sizeof(mach_vm_reclaim_id_t);
	}
	id_cache->ric_head = 0;
	xzm_debug_assert(id_cache->ric_len >= max_buffer_count);
}

bool
xzm_reclaim_init(xzm_main_malloc_zone_t main,
		mach_vm_reclaim_count_t initial_count, mach_vm_reclaim_count_t max_count)
{
	// Pick a sane minimum number of entries and let vm_reclaim round up
	// to a page boundary. The intention is for the initial size to be
	// one page.
	mach_vm_reclaim_count_t buffer_capacity =
			mach_vm_reclaim_round_capacity(initial_count);
	mach_vm_reclaim_count_t max_buffer_capacity =
			mach_vm_reclaim_round_capacity(max_count);
	xzm_reclaim_buffer.xrb_id_cache.ric_len = 0;
	xzm_reclaim_buffer.xrb_id_cache.ric_ids = NULL;
	_malloc_lock_init(&xzm_reclaim_buffer.xrb_lock);
	mach_vm_reclaim_error_t err = mach_vm_reclaim_ring_allocate(
			&xzm_reclaim_buffer.xrb_ringbuffer, buffer_capacity,
			max_buffer_capacity);
	if (err == VM_RECLAIM_SUCCESS) {
		xzm_reclaim_buffer.xrb_len = buffer_capacity;
		main->xzmz_reclaim_buffer = &xzm_reclaim_buffer;
		_xzm_reclaim_id_cache_init(&xzm_reclaim_buffer);
	} else {
		malloc_report(ASL_LEVEL_ERR,
				"xzm: failed to initialize deferred "
				"reclamation buffer [%d] %s\n",
				err_get_code(err), mach_error_string(err));
	}
	return (err == VM_RECLAIM_SUCCESS);
}

static mach_vm_reclaim_state_t
_xzm_reclaim_mark_used_locked(xzm_reclaim_buffer_t buffer,
		mach_vm_reclaim_id_t id, uint8_t *addr, size_t size, bool reusable,
		bool *update_accounting_out)
{
	mach_vm_reclaim_error_t err;
	mach_vm_reclaim_state_t state;

	xzm_debug_assert(size <= UINT32_MAX);
	mach_vm_reclaim_action_t behavior = reusable ?
			VM_RECLAIM_FREE : VM_RECLAIM_DEALLOCATE;

	err = mach_vm_reclaim_try_cancel(buffer->xrb_ringbuffer, id,
			(mach_vm_address_t)addr, (mach_vm_size_t)size,
			behavior, &state, update_accounting_out);
	xzm_assert(err == VM_RECLAIM_SUCCESS);

	if (state == VM_RECLAIM_UNRECLAIMED) {
		_xzm_reclaim_id_cache_push(&buffer->xrb_id_cache, id);
	}

	return state;
}

static mach_vm_reclaim_state_t
_xzm_reclaim_mark_used(xzm_reclaim_buffer_t buffer, mach_vm_reclaim_id_t id,
		uint8_t *addr, size_t size, bool reusable)
{
	bool update_accounting = false;

	_malloc_lock_lock(&buffer->xrb_lock);

	mach_vm_reclaim_state_t state = _xzm_reclaim_mark_used_locked(buffer, id,
			addr, size, reusable, &update_accounting);

	_malloc_lock_unlock(&buffer->xrb_lock);

	if (update_accounting) {
		__assert_only mach_vm_reclaim_error_t err =
				mach_vm_reclaim_update_kernel_accounting(buffer->xrb_ringbuffer);
		xzm_debug_assert(err == VM_RECLAIM_SUCCESS);
	}

	return state;
}

static bool
_xzm_reclaim_is_reusable(xzm_reclaim_buffer_t buffer, mach_vm_reclaim_id_t reclaim_id, bool deallocate)
{
	mach_vm_reclaim_error_t err;
	mach_vm_reclaim_state_t state;
	err = mach_vm_reclaim_query_state(buffer->xrb_ringbuffer, reclaim_id,
			deallocate ? VM_RECLAIM_DEALLOCATE : VM_RECLAIM_FREE, &state);
	xzm_assert(err == VM_RECLAIM_SUCCESS);
	return mach_vm_reclaim_is_reusable(state);
}

uint64_t
xzm_reclaim_mark_free_locked(xzm_reclaim_buffer_t buffer, uint8_t *addr,
		size_t size, bool reusable, bool *update_accounting_out)
{
	mach_vm_reclaim_error_t kr;
	mach_vm_reclaim_id_t id;
	mach_vm_address_t vm_addr = (mach_vm_address_t)addr;
	uint32_t vm_size = (uint32_t)size;
	xzm_debug_assert(size <= UINT32_MAX);
	xzm_debug_assert(vm_addr % XZM_SEGMENT_SLICE_SIZE == 0);
	xzm_debug_assert(vm_size % XZM_SEGMENT_SLICE_SIZE == 0);
#ifdef DEBUG
	_malloc_lock_assert_owner(&buffer->xrb_lock);
#endif // DEBUG

	mach_vm_reclaim_action_t behavior = reusable ?
			VM_RECLAIM_FREE : VM_RECLAIM_DEALLOCATE;

	while (!_xzm_reclaim_id_cache_is_empty(&buffer->xrb_id_cache)) {
		id = _xzm_reclaim_id_cache_pop(&buffer->xrb_id_cache);
		kr = mach_vm_reclaim_try_enter(
				buffer->xrb_ringbuffer,
				vm_addr, vm_size, behavior, &id,
				update_accounting_out);
		xzm_assert(kr == VM_RECLAIM_SUCCESS);
		if (id != VM_RECLAIM_ID_NULL) {
			goto done;
		}
	}
	do {
		id = VM_RECLAIM_ID_NULL;
		kr = mach_vm_reclaim_try_enter(buffer->xrb_ringbuffer, vm_addr, vm_size,
				behavior, &id, update_accounting_out);
		xzm_assert(kr == VM_RECLAIM_SUCCESS);
		if (id == VM_RECLAIM_ID_NULL) {
			// If the ringbuffer is full, reap all of its contents and resize
			xzm_reclaim_sync_and_resize(buffer);
		}
	} while (id == VM_RECLAIM_ID_NULL);

done:
	return id;
}

static uint64_t
_xzm_reclaim_mark_free(xzm_reclaim_buffer_t buffer, uint8_t *addr, size_t size,
		bool reusable)
{
	uint64_t id;
	bool should_update_kernel_accounting = false;

	_malloc_lock_lock(&buffer->xrb_lock);

	id = xzm_reclaim_mark_free_locked(buffer, addr, size, reusable,
			&should_update_kernel_accounting);

	_malloc_lock_unlock(&buffer->xrb_lock);

	if (should_update_kernel_accounting) {
		__assert_only mach_vm_reclaim_error_t kr =
				mach_vm_reclaim_update_kernel_accounting(buffer->xrb_ringbuffer);
		xzm_debug_assert(kr == VM_RECLAIM_SUCCESS);
	}
	return id;
}

static bool
xzm_reclaim_mark_smaller(xzm_reclaim_buffer_t buffer, uint64_t *front_id,
		uint64_t *back_id, uint8_t *front_start, size_t front_free_size,
		size_t used_size, size_t back_free_size, bool deferred, bool pristine,
		bool reusable)
{
	const size_t span_size = front_free_size + used_size + back_free_size;
	xzm_debug_assert(span_size <= UINT32_MAX);

	bool should_update_used = false;
	bool should_update_free_front = false, should_update_free_back = false;

	_malloc_lock_lock(&buffer->xrb_lock);

	bool usable = true;
	mach_vm_reclaim_state_t state;
	if (deferred) {
		xzm_debug_assert(*front_id != VM_RECLAIM_ID_NULL);
		// Mark the entire span as used
		state = _xzm_reclaim_mark_used_locked(buffer, *front_id, front_start,
				span_size, reusable, &should_update_used);
		usable = mach_vm_reclaim_is_reusable(state);
		if (usable) {
			*front_id = VM_RECLAIM_ID_NULL;
		}
	}
	if (usable) {
		if (front_free_size && !pristine) {
			// Mark the front as free. Note that it already has a reclaim id
			xzm_debug_assert(*front_id == VM_RECLAIM_ID_NULL);
			*front_id = xzm_reclaim_mark_free_locked(buffer, front_start,
					front_free_size, reusable, &should_update_free_front);
		}

		if (back_free_size) {
			xzm_debug_assert(back_id);
			if (!pristine) {
				// Mark the back as free
				uint8_t *back_start = front_start + front_free_size + used_size;
				*back_id = xzm_reclaim_mark_free_locked(buffer, back_start,
						back_free_size, reusable, &should_update_free_back);
			} else {
				// Initialize the reclaim id now, because when the span metadata
				// is updated, it cannot overwrite any reclaim id we set
				*back_id = VM_RECLAIM_ID_NULL;
			}
		}
	}

	_malloc_lock_unlock(&buffer->xrb_lock);

	if (should_update_used || should_update_free_front ||
			should_update_free_back) {
		mach_vm_reclaim_update_kernel_accounting(buffer->xrb_ringbuffer);
	}

	return usable;
}

void
xzm_reclaim_force_sync(xzm_reclaim_buffer_t buffer)
{
	// This function is called in a loop when reclaim_mark_used fails while
	// trying to free a span in the reclaim buffer.
	mach_vm_reclaim_count_t capacity;
	__assert_only mach_vm_reclaim_error_t err;
	err = mach_vm_reclaim_ring_capacity(buffer->xrb_ringbuffer, &capacity);
	xzm_assert(err == VM_RECLAIM_SUCCESS);
	err = mach_vm_reclaim_ring_flush(buffer->xrb_ringbuffer, capacity);
	xzm_assert(err == VM_RECLAIM_SUCCESS);
}

void
xzm_reclaim_sync_and_resize(xzm_reclaim_buffer_t buffer)
{
	mach_vm_reclaim_error_t kr;
	mach_vm_reclaim_count_t count;
	kr = mach_vm_reclaim_ring_capacity(buffer->xrb_ringbuffer, &count);
	xzm_assert(kr == VM_RECLAIM_SUCCESS);
	mach_vm_reclaim_count_t new_count =
			mach_vm_reclaim_round_capacity(2 * count);

	kr = mach_vm_reclaim_ring_resize(buffer->xrb_ringbuffer, new_count);
	if (kr == VM_RECLAIM_SUCCESS) {
		_xzm_reclaim_id_cache_init(buffer);
	} else {
		// Must explicitly flush if the resize operation failed
		xzm_reclaim_force_sync(buffer);
	}
}

#endif // CONFIG_XZM_DEFERRED_RECLAIM

#pragma mark range group

OS_OPTIONS(xzm_range_group_alloc_flags, uint32_t,
	XZM_RANGE_GROUP_ALLOC_FLAGS_HUGE = 1 << 0,
	XZM_RANGE_GROUP_ALLOC_FLAGS_PURGEABLE = 1 << 1,
#if CONFIG_MTE
	XZM_RANGE_GROUP_ALLOC_FLAGS_MTE = 1 << 2,
#endif
);

static int
_xzm_range_group_vm_tag_for_segment(size_t size, bool huge)
{
	// Note: although there is already a VM_MEMORY_MALLOC_HUGE tag, which has
	// been there since prehistory, we'll use LARGE for huge segments to ensure
	// that any special handling from the kernel or other tools works exactly as
	// before (e.g. VM_MEMORY_MALLOC_HUGE is not included in
	// vm_memory_malloc_no_cow_mask)
	//
	// We use VM_MEMORY_MALLOC_SMALL for normal segment allocations so that they
	// are easily distinguisable from metadata allocations purely by tag.
	return huge ? VM_MEMORY_MALLOC_LARGE : VM_MEMORY_MALLOC_SMALL;
}

static void * __alloc_size(2)
_xzm_range_group_alloc_mvm_segment(xzm_main_malloc_zone_t main, size_t size,
		size_t align, plat_map_t *map, xzm_range_group_alloc_flags_t rga_flags)
{
	bool huge = (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_HUGE);
	bool purgeable = (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_PURGEABLE);

	uint32_t flags = 0;
	if (os_unlikely(purgeable)) {
		flags |= MALLOC_PURGEABLE;
	}

#if XZM_NARROW_BUCKETING
	// If we're doing narrow bucketing, and we ourselves aren't enabling
	// VM user ranges, but we've detected that VM user ranges are active in the
	// address space (<-> entropic_base is set), we want to pass DISABLE_ASLR to
	// skip the mvm-layer ASLR, which would cause our allocations to be placed
	// at the opposite end of the heap range from other pure data allocations
	// and use an additional PTE
	if (main->xzmz_narrow_bucketing && !main->xzmz_use_ranges && entropic_base) {
		flags |= DISABLE_ASLR;
	}
#endif

#if CONFIG_MTE
	if (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_MTE) {
		flags |= MALLOC_MTE_TAGGABLE;
	}
#endif

	int tag = _xzm_range_group_vm_tag_for_segment(size, huge);
	if (os_likely(align == 0)) {
		return mvm_allocate_pages_plat(size, XZM_SEGMENT_SHIFT, flags, tag, map);
	} else {
		// mvm_allocate_pages_plat takes the log2 of the alignment
		size_t align_pow = __builtin_ctzl(align);
		xzm_debug_assert(align_pow < UINT8_MAX);
		align_pow = MAX(align_pow, XZM_SEGMENT_SHIFT);
		return mvm_allocate_pages_plat(size, align_pow, flags, tag, map);
	}
}

MALLOC_USED
static void * __alloc_size(1)
_xzm_range_group_alloc_anywhere_segment(mach_vm_address_t hint, size_t size,
		size_t align, plat_map_t *map, xzm_range_group_alloc_flags_t rga_flags)
{
	bool huge = (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_HUGE);
	bool purgeable = (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_PURGEABLE);

	(void)map;
	int tag = _xzm_range_group_vm_tag_for_segment(size, huge);

	mach_vm_address_t vm_addr = hint;
	mach_vm_size_t allocation_size = (mach_vm_size_t)size;
	int flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(tag);
	if (os_unlikely(purgeable)) {
		flags |= VM_FLAGS_PURGABLE;
	}

#if CONFIG_MTE
	if (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_MTE) {
		flags |= VM_FLAGS_MTE;
	}
#endif

	align = MAX(align, XZM_SEGMENT_SIZE);
	// alignment must be a power of 2 for the allocation mask to work
	xzm_debug_assert(powerof2(align));
	mach_vm_offset_t allocation_mask = (mach_vm_offset_t)align - 1;
	kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size,
			allocation_mask, flags, MEMORY_OBJECT_NULL, 0, FALSE,
			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
	if (kr) {
		if (kr != KERN_NO_SPACE) {
			xzm_debug_abort_with_reason("Failed to allocate data segment", kr);
			malloc_zone_error(0, false,
					"Failed to allocate segment (size=%lu, flags=%x, kr=%d)\n",
					(unsigned long)size, flags, kr);
		}

		return NULL;
	}

	xzm_debug_assert(vm_addr);
	xzm_debug_assert(vm_addr % align == 0);
	return (void *)vm_addr;
}

static uintptr_t
_xzm_range_group_bump_alloc_segment(xzm_range_group_t rg, size_t size,
		bool warn_on_exhaustion)
{
	uintptr_t segment_addr = 0;

	if (rg->xzrg_warned_full) {
		return segment_addr;
	}

	// Reserve space for a new segment
	_malloc_lock_lock(&rg->xzrg_lock);
	if (rg->xzrg_remaining >= size) {
		if (rg->xzrg_next == rg->xzrg_skip_addr) {
			if (rg->xzrg_direction == XZM_FRONT_INCREASING) {
				rg->xzrg_next += rg->xzrg_skip_size;
			} else {
				xzm_debug_assert(rg->xzrg_direction == XZM_FRONT_DECREASING);
				rg->xzrg_next -= rg->xzrg_skip_size;
			}
		}

		// In the decreasing direction, xzrg_next points to the _end_ of what
		// will be the next segment we serve, and we subtract the size to be
		// allocated from its initial value.  In the increasing direction, the
		// initial value is the start of the segment we're going to serve, and
		// we increase afterward.
		if (rg->xzrg_direction == XZM_FRONT_DECREASING) {
			rg->xzrg_next -= size;
		}

		segment_addr = rg->xzrg_next;
		xzm_debug_assert(segment_addr % size == 0);

		if (rg->xzrg_direction == XZM_FRONT_INCREASING) {
			rg->xzrg_next += size;
		}

		rg->xzrg_remaining -= size;
	}

	if (!segment_addr && warn_on_exhaustion) {
		if (!rg->xzrg_warned_full) {
			rg->xzrg_warned_full = true;
			malloc_report(ASL_LEVEL_WARNING, "Failed to allocate segment from range group - out of space\n");
		}
	}

	_malloc_lock_unlock(&rg->xzrg_lock);

	return segment_addr;
}

static void * __alloc_size(2)
_xzm_range_group_alloc_data_segment(xzm_range_group_t rg, size_t size,
		size_t alignment, plat_map_t *map, xzm_range_group_alloc_flags_t rga_flags)
{
	xzm_debug_assert(rg->xzrg_id == XZM_RANGE_GROUP_DATA);

#if   CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES
	if (rg->xzrg_main_ref->xzmz_use_ranges) {
		// On systems with VM user ranges, an ANYWHERE allocation with one of the
		// VM_MEMORY_MALLOC tags will be placed in the data range automatically.
		mach_vm_address_t hint = 0;

#if CONFIG_MACOS_RANGES
		// On macOS, the data range isn't strongly isolated.  We just choose an
		// otherwise empty normal range of the address space to allocate into
		// using a hint.
		hint = rg->xzrg_base;
#endif // CONFIG_MACOS_RANGES

		return _xzm_range_group_alloc_anywhere_segment(hint, size, alignment,
				map, rga_flags);
	}
#endif // CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES

	return _xzm_range_group_alloc_mvm_segment(rg->xzrg_main_ref, size,
			alignment, map, rga_flags);
}

static void * __alloc_size(2)
_xzm_range_group_alloc_ptr_segment(xzm_range_group_t rg, size_t size,
		plat_map_t *map, xzm_range_group_alloc_flags_t rga_flags)
{
	xzm_debug_assert(rg->xzrg_id == XZM_RANGE_GROUP_PTR);
	xzm_debug_assert(!(rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_HUGE));
	xzm_debug_assert(size == XZM_SEGMENT_SIZE);
	xzm_debug_assert(rg->xzrg_main_ref->xzmz_segment_group_count !=
			XZM_SEGMENT_GROUP_IDS_COUNT_DATA_ONLY);

#if   CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES
	if (rg->xzrg_main_ref->xzmz_use_ranges)
#else
	if ((0))
#endif // MALLOC_TARGET_EXCLAVES
	{
		bool allow_fallback = false;
#if CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES
		if (!malloc_process_is_security_critical(malloc_process_identity)) {
			allow_fallback = true;
		}
#endif // CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES

		mach_vm_address_t segment_addr = _xzm_range_group_bump_alloc_segment(rg,
				size, !allow_fallback);
		if (!segment_addr) {
#if CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES
			if (allow_fallback) {
				goto fallback;
			}
#endif // CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES

			xzm_debug_abort("Pointer range exhausted");
			return NULL;
		}

#if CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES
		mach_vm_address_t vm_addr = segment_addr;
		mach_vm_size_t vm_size = (mach_vm_size_t)size;
		int alloc_flags = VM_FLAGS_OVERWRITE |
				VM_MAKE_TAG(VM_MEMORY_MALLOC_SMALL);

#if CONFIG_MTE
		if (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_MTE) {
			alloc_flags |= VM_FLAGS_MTE;
		}
#endif

		kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
				/* mask */ 0, alloc_flags, MEMORY_OBJECT_NULL,
				/* offset */ 0, /* copy */ FALSE, VM_PROT_DEFAULT,
				VM_PROT_ALL, VM_INHERIT_DEFAULT);
		if (kr != KERN_SUCCESS) {
			xzm_abort_with_reason(
					"pointer range mach_vm_map() overwrite failed", kr);
		}
#endif // CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES

		return (void *)segment_addr;
	}

#if CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES
fallback:
#endif // CONFIG_MALLOC_PROCESS_IDENTITY && CONFIG_MACOS_RANGES
	return _xzm_range_group_alloc_mvm_segment(rg->xzrg_main_ref, size, 0, map,
			rga_flags);
}

static void * __alloc_size(2)
xzm_range_group_alloc_segment(xzm_range_group_t rg, size_t size,
		size_t alignment, plat_map_t *map,
		xzm_range_group_alloc_flags_t rga_flags)
{
	if (rg->xzrg_id == XZM_RANGE_GROUP_DATA) {
		return _xzm_range_group_alloc_data_segment(rg, size, alignment, map,
				rga_flags);
	} else {
		xzm_debug_assert(alignment == 0);
		// Only huge segment bodies (which must be in the data range) can be
		// purgable
		xzm_debug_assert(!(rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_PURGEABLE));
		return _xzm_range_group_alloc_ptr_segment(rg, size, map, rga_flags);
	}
}

static void
xzm_range_group_free_segment_body(xzm_range_group_t rg, void *body,
		size_t size, plat_map_t *map)
{
	xzm_debug_assert(rg->xzrg_id == XZM_RANGE_GROUP_DATA);

	int debug_flags = 0;
#ifdef DEBUG
	debug_flags = MALLOC_ABORT_ON_ERROR;
#endif // DEBUG
	mvm_deallocate_plat(body, size, debug_flags, map);
}

#if CONFIG_VM_USER_RANGES
static bool
parse_void_ranges(struct mach_vm_range *left_void,
		struct mach_vm_range *right_void)
{
	char buf[256];
	size_t bsz = sizeof(buf) - 1;
	char *s;

	int rc = sysctlbyname("vm.malloc_ranges", buf, &bsz, NULL, 0);
	if (rc == -1) {
		switch (errno) {
		case ENOENT:
#ifdef DEBUG
			malloc_report(ASL_LEVEL_INFO, "VM user ranges not supported\n");
#endif
			break;
		case EPERM:
			// TODO: make this fatal in processes that strictly need VM user
			// ranges
			malloc_report(ASL_LEVEL_ERR,
					"sysctlbyname(\"vm.malloc_ranges\") denied\n");
			break;
		default:
			xzm_abort_with_reason("sysctlbyname(\"vm.malloc_ranges\") failed",
					errno);
			break;
		}
		return false;
	}
	buf[bsz] = '\0';

	s = buf;

	left_void->min_address = strtoull(s, &s, 16);
	s++;

	left_void->max_address = strtoull(s, &s, 16);
	s++;

	right_void->min_address = strtoull(s, &s, 16);
	s++;

	right_void->max_address = strtoull(s, &s, 16);

	return true;
}
#endif // CONFIG_VM_USER_RANGES

#if MALLOC_TARGET_EXCLAVES || CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES

#define XZM_RANGE_SEPARATION   GiB(4)

#define XZM_DATA_RANGE_SIZE    GiB(10)
#define XZM_POINTER_RANGE_SIZE GiB(16)

#define XZM_PAGE_TABLE_GRANULE MiB(32)
#define XZM_PAGE_TABLE_BITS	   25

// Exclaves don't have struct mach_vm_range, so we'll just define our own little
// identical type
struct xzm_vm_range {
	uint64_t min_address;
	uint64_t max_address;
};


static_assert(sizeof(struct mach_vm_range) == sizeof(struct xzm_vm_range),
		"compatible vm range size");
static_assert(offsetof(struct mach_vm_range, min_address) ==
		offsetof(struct xzm_vm_range, min_address),
		"compatible vm range min_address offset");
static_assert(offsetof(struct mach_vm_range, max_address) ==
		offsetof(struct xzm_vm_range, max_address),
		"compatible vm range max_address offset");


static void
_xzm_main_malloc_zone_init_ptr_fronts(xzm_range_group_t range_groups,
		size_t allocation_front_count, struct xzm_vm_range *ranges,
		size_t range_count, plat_map_t *map)
{
	xzm_assert(allocation_front_count == 2);
	xzm_assert(range_count > 0);
	xzm_assert(ranges[0].min_address < ranges[0].max_address);
	if (range_count > 1) {
#if CONFIG_VM_USER_RANGES
		xzm_assert(range_count == 2);
		xzm_assert(ranges[1].min_address > ranges[0].max_address);
		xzm_assert(ranges[1].min_address < ranges[1].max_address);
#else
		xzm_abort_with_reason("unsupported range_count", range_count);
#endif
	}

	uint64_t total_span = 0;
	for (size_t i = 0; i < range_count; i++) {
		total_span += ranges[i].max_address - ranges[i].min_address;
	}
	uint64_t middle_pte_offset = roundup(total_span / 2,
			XZM_PAGE_TABLE_GRANULE);

	if (ranges[0].min_address + middle_pte_offset >= ranges[0].max_address) {
		xzm_assert(range_count == 2);
		middle_pte_offset += ranges[1].min_address - ranges[0].max_address;
	}

	uint64_t middle_pte = ranges[0].min_address + middle_pte_offset;
	xzm_assert(middle_pte % XZM_PAGE_TABLE_GRANULE == 0);

	uint64_t middle_pte_middle = middle_pte + (XZM_PAGE_TABLE_GRANULE / 2);

	uint64_t rg_up_size = 0;
	uint64_t rg_up_skip_addr = 0;
	uint64_t rg_up_skip_size = 0;

	uint64_t rg_down_size = 0;
	uint64_t rg_down_skip_addr = 0;
	uint64_t rg_down_skip_size = 0;

	if (range_count == 2) {
		if (middle_pte_middle > ranges[0].max_address) {
			xzm_assert(middle_pte_middle > ranges[1].min_address);
			xzm_assert(middle_pte_middle < ranges[1].max_address);

			// The right side (up) is not split
			rg_up_size = ranges[1].max_address - middle_pte_middle;

			// The left side (down) is split
			rg_down_size = (middle_pte_middle - ranges[1].min_address) +
					(ranges[0].max_address - ranges[0].min_address);
			rg_down_skip_addr = ranges[1].min_address;
			rg_down_skip_size = ranges[1].min_address - ranges[0].max_address;
		} else {
			xzm_assert(middle_pte_middle < ranges[0].max_address);
			xzm_assert(middle_pte_middle > ranges[0].min_address);

			// The right side (up) is split
			rg_up_size = (ranges[1].max_address - ranges[1].min_address) +
					(ranges[0].max_address - middle_pte_middle);
			rg_up_skip_addr = ranges[0].max_address;
			rg_up_skip_size = ranges[1].min_address - ranges[0].max_address;

			// The left side (down) is not split
			rg_down_size = middle_pte_middle - ranges[0].min_address;
		}
	} else {
		xzm_assert(ranges[0].min_address < middle_pte_middle);
		xzm_assert(middle_pte_middle < ranges[0].max_address);

		rg_up_size = ranges[0].max_address - middle_pte_middle;
		rg_down_size = middle_pte_middle - ranges[0].min_address;
	}

	xzm_range_group_t ptr_rg_up = &range_groups[XZM_RANGE_GROUP_PTR + 0];
	xzm_debug_assert(ptr_rg_up->xzrg_id == XZM_RANGE_GROUP_PTR);

	ptr_rg_up->xzrg_base = middle_pte_middle;
	ptr_rg_up->xzrg_next = ptr_rg_up->xzrg_base;
	ptr_rg_up->xzrg_size = rg_up_size;
	ptr_rg_up->xzrg_remaining = ptr_rg_up->xzrg_size;
	ptr_rg_up->xzrg_skip_addr = rg_up_skip_addr;
	ptr_rg_up->xzrg_skip_size = rg_up_skip_size;
	ptr_rg_up->xzrg_direction = XZM_FRONT_INCREASING;

	xzm_range_group_t ptr_rg_down = &range_groups[XZM_RANGE_GROUP_PTR + 1];
	xzm_debug_assert(ptr_rg_down->xzrg_id == XZM_RANGE_GROUP_PTR);

	ptr_rg_down->xzrg_base = middle_pte_middle;
	ptr_rg_down->xzrg_next = ptr_rg_down->xzrg_base;
	ptr_rg_down->xzrg_size = rg_down_size;
	ptr_rg_down->xzrg_remaining = ptr_rg_down->xzrg_size;
	ptr_rg_down->xzrg_skip_addr = rg_down_skip_addr;
	ptr_rg_down->xzrg_skip_size = rg_down_skip_size;
	ptr_rg_down->xzrg_direction = XZM_FRONT_DECREASING;

}

#if CONFIG_VM_USER_RANGES

static void
_xzm_main_malloc_zone_choose_ptr_ranges(struct mach_vm_range left_void,
		struct mach_vm_range right_void, size_t ptr_rg_size, uint64_t entropy,
		struct mach_vm_range *ranges_out, size_t *ranges_count_inout)
{
	// For now, the caller needs to be able to handle 2 result ranges
	xzm_assert(*ranges_count_inout == 2);

	xzm_assert(left_void.min_address);
	xzm_assert(left_void.max_address >= left_void.min_address);
	xzm_assert(right_void.min_address >= left_void.max_address);
	xzm_assert(right_void.max_address >= right_void.min_address);

#define xzm_trunc_page_table_granule(addr) \
		((addr) & ~(XZM_PAGE_TABLE_GRANULE - 1))

	// Note: the void boundaries should already be aligned to the page table
	// granule anyway

	// |<----------------total span--------------->|
	// |<-left  void->|<-data body->|<-right void->|
	// |<usable>|<pad>|<-data body->|<pad>|<usable>|
	// |<usable>|<-------data span------->|<usable>|

	uint64_t left_void_min = roundup(left_void.min_address,
			XZM_PAGE_TABLE_GRANULE);
	uint64_t left_void_limit =
			xzm_trunc_page_table_granule(left_void.max_address);
	if (left_void_limit < left_void_min) {
		// Shouldn't ever happen - the kernel would have to give us a
		// sub-granule left void that isn't granule-aligned.  If it does, we can
		// pretend it gave us an empty left void that's actually "in" the data
		// range, technically.
		left_void_min = left_void_limit;
	}
	xzm_assert(left_void_min <= left_void_limit);

	uint64_t right_void_min = roundup(right_void.min_address,
			XZM_PAGE_TABLE_GRANULE);
	uint64_t right_void_limit =
			xzm_trunc_page_table_granule(right_void.max_address);
	if (right_void_limit < right_void_min) {
		// Same thing, shouldn't happen
		right_void_limit = right_void_min;
	}
	xzm_assert(right_void_min <= right_void_limit);

	xzm_assert(left_void_limit <= right_void_min);

	uint64_t total_span = right_void_limit - left_void_min;

	uint64_t data_body_span = right_void_min - left_void_limit;

	uint64_t data_left_pad = MIN(XZM_RANGE_SEPARATION,
			left_void_limit - left_void_min);
	uint64_t data_left_pad_start = left_void_limit - data_left_pad;

	uint64_t data_right_pad = MIN(XZM_RANGE_SEPARATION,
			right_void_limit - right_void_min);
	uint64_t data_right_pad_limit = right_void_min + data_right_pad;

	uint64_t data_span = data_left_pad + data_body_span + data_right_pad;

	xzm_assert(data_span < total_span);
	uint64_t usable_space = total_span - data_span;

	xzm_assert(usable_space >= ptr_rg_size);
	uint64_t starting_space = usable_space - ptr_rg_size;

	xzm_assert(starting_space % XZM_PAGE_TABLE_GRANULE == 0);

	// Note: + 1 because the final granule address is also usable
	uint64_t starting_candidate_granules =
			(starting_space / XZM_PAGE_TABLE_GRANULE) + 1;

	// Note: start_granules is small relative to entropy, so the modulo bias is
	// not significant
	uint64_t start_granule = entropy % starting_candidate_granules;

	uint64_t start_address = left_void_min +
			(start_granule * XZM_PAGE_TABLE_GRANULE);

	if (start_address >= data_left_pad_start) {
		start_address += data_span;
	}

	uint64_t limit_address = start_address + ptr_rg_size;

	if (start_address < data_left_pad_start &&
			limit_address > data_left_pad_start) {
		// The pointer range is split across the data range
		ranges_out[0] = (struct mach_vm_range){
			.min_address = start_address,
			.max_address = data_left_pad_start,
		};

		uint64_t left_range_span = data_left_pad_start - start_address;
		uint64_t right_range_span = ptr_rg_size - left_range_span;
		ranges_out[1] = (struct mach_vm_range){
			.min_address = data_right_pad_limit,
			.max_address = data_right_pad_limit + right_range_span,
		};

		*ranges_count_inout = 2;
	} else {
		// The pointer range is fully on one side of the data range
		ranges_out[0] = (struct mach_vm_range){
			.min_address = start_address,
			.max_address = limit_address,
		};

		*ranges_count_inout = 1;
	}
}

static kern_return_t
_xzm_main_malloc_zone_create_ptr_range(struct mach_vm_range range)
{
	// It's important that we use a malloc tag in the recipe so that the kernel
	// gives us a single object rather than chunking into many.
	mach_vm_range_recipe_v1_t recipe = {
		.range = range,
		.range_tag = MACH_VM_RANGE_FIXED,
		.vm_tag = VM_MEMORY_MALLOC_SMALL,
	};

	kern_return_t kr = mach_vm_range_create(mach_task_self(),
			MACH_VM_RANGE_FLAVOR_V1, (mach_vm_range_recipes_raw_t)&recipe,
			sizeof(recipe));
	switch (kr) {
	case KERN_SUCCESS:
		break;
	case KERN_DENIED:
		// TODO: make this fatal in processes that strictly need VM user ranges
		malloc_report(ASL_LEVEL_ERR, "mach_vm_range_create() denied\n");
		return kr;
	case KERN_NOT_SUPPORTED:
		// Strange - in a process that doesn't have VM user ranges we would have
		// expected the sysctl to fail
		xzm_debug_abort("mach_vm_range_create() not supported?");
		return kr;
	default:
		xzm_abort_with_reason("unexpected error from mach_vm_range_create()",
				kr);
		return kr;
	}

	// Avoid malloc-no-CoW semantics on the pointer range reservation by
	// replacing the VM object for it with one that has a non-malloc tag.
	// Giving it VM_PROT_NONE causes the kernel to give us a single object
	// rather than chunking (which is important to avoid creating tons of
	// pointless VM objects), and hides it in vmmap by default.
	mach_vm_address_t overwrite_addr = (mach_vm_address_t)range.min_address;
	mach_vm_size_t overwrite_size =
			(mach_vm_size_t)(range.max_address - range.min_address);
	int alloc_flags = VM_FLAGS_OVERWRITE;
	kr = mach_vm_map(mach_task_self(), &overwrite_addr, overwrite_size,
			/* mask */ 0, alloc_flags, MEMORY_OBJECT_NULL, /* offset */ 0,
			/* copy */ FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT);
	if (kr != KERN_SUCCESS) {
		xzm_abort_with_reason(
				"pointer range initial overwrite failed", kr);
	}

	return KERN_SUCCESS;
}

#endif // CONFIG_VM_USER_RANGES

#endif // MALLOC_TARGET_EXCLAVES || CONFIG_VM_USER_RANGES || CONFIG_MACOS_RANGES

void
xzm_main_malloc_zone_init_range_groups(xzm_main_malloc_zone_t main)
{
	// Basic initialization is done in xzm_main_malloc_zone_create() - here we
	// mainly deal with VM user ranges.
	MALLOC_STATIC_ASSERT(XZM_RANGE_GROUP_COUNT == 3,
			"all range groups need to be initialized");

#if   CONFIG_VM_USER_RANGES
	struct mach_vm_range left_void, right_void;
	bool user_ranges_supported = parse_void_ranges(&left_void, &right_void);
	if (!user_ranges_supported) {
		return;
	}

	// VM user range support:
	//
	// We'll use the kernel default heap range for the DATA range.
	//
	// The ranges in the PTR range group:
	// - Should be separated from the data range (as defined by
	//   [void1.max_address, void2.min_address)) by at least 4G
	// - Should allow each allocation front to span 8G, possibly crossing the
	//   DATA range if necessary

	// The configurations we support are:
	// - No user ranges at all, in which case we shouldn't get here
	// - User ranges support with 2 allocation fronts
	if (main->xzmz_allocation_front_count != 2) {
		xzm_abort_with_reason("unsupported allocation front count",
				main->xzmz_allocation_front_count);
	}

	size_t ptr_rg_size = XZM_POINTER_RANGE_SIZE;

	struct mach_vm_range ptr_ranges[2];
	size_t ptr_range_count = 2;
	_xzm_main_malloc_zone_choose_ptr_ranges(left_void, right_void, ptr_rg_size,
			malloc_entropy[1], ptr_ranges, &ptr_range_count);

	for (size_t i = 0; i < ptr_range_count; i++) {
		kern_return_t kr =
				_xzm_main_malloc_zone_create_ptr_range(ptr_ranges[i]);
		if (kr != KERN_SUCCESS) {
			return;
		}
	}

	main->xzmz_use_ranges = true;

	_xzm_main_malloc_zone_init_ptr_fronts(main->xzmz_range_groups,
			main->xzmz_allocation_front_count,
			(struct xzm_vm_range *)ptr_ranges, ptr_range_count, NULL);

	xzm_range_group_t data_rg = &main->xzmz_range_groups[XZM_RANGE_GROUP_DATA];
	xzm_debug_assert(data_rg->xzrg_id == XZM_RANGE_GROUP_DATA);

	// Note: these are recorded purely for introspection purposes
	data_rg->xzrg_base = (mach_vm_address_t)left_void.max_address;
	data_rg->xzrg_size = right_void.min_address - left_void.max_address;

	// end of CONFIG_VM_USER_RANGES
#elif CONFIG_MACOS_RANGES
	// We want a similar layout to embedded, with:
	// - A data range and a pointer range located in the first 64GB (L2) of the
	//   address space to economize PTE usage
	// - Guaranteed minimum separation between the pointer range and everything
	//   else
	// - Both ranges separated from the traditional "low space" by a few GB of
	//   buffer distance
	//
	// However, on macOS there are no "voids" for us to need the
	// mach_vm_range_create() interface to access, nor is there a special data
	// range that the kernel knows about.  Instead, we create our own strongly
	// isolated pointer range reservation, and have a more relaxed model for the
	// data range that permits reuse with general VA, allowing us to model it as
	// a simple starting address hint.  An implication of the data range not
	// being strongly isolated is that it doesn't need to be contiguous.
	//
	// Either range should be able to grow to their standard size without
	// overflowing the first L2.
	//
	// So, our placement strategy will be:
	// - Place the pointer range, with its guards, in the space
	// - Then choose the data range hint somewhere in the remaining space

	// Start at 16GB to leave room in the low space for other VM allocations
#define XZM_MACOS_RANGES_START GiB(16)
	// End at 63GB to avoid crossing the commpage
#define XZM_MACOS_RANGES_END   GiB(63)

	uint64_t range_first_candidate = XZM_MACOS_RANGES_START;
	uint64_t ptr_reservation_size = XZM_RANGE_SEPARATION +
			XZM_POINTER_RANGE_SIZE + XZM_RANGE_SEPARATION;
	uint64_t range_last_candidate = XZM_MACOS_RANGES_END - ptr_reservation_size;

	uint64_t ptr_candidate_span = range_last_candidate - range_first_candidate;
	uint64_t ptr_candidate_granules =
			ptr_candidate_span / XZM_PAGE_TABLE_GRANULE;

	uint64_t ptr_entropy = (uint32_t)(malloc_entropy[1]);
	uint64_t ptr_granule = ptr_entropy % ptr_candidate_granules;

	uint64_t ptr_start =
			range_first_candidate + (ptr_granule * XZM_PAGE_TABLE_GRANULE);

	xzm_assert(ptr_start + ptr_reservation_size <= XZM_MACOS_RANGES_END);

	// Reserve the pointer range with a big max-protection == PROT_NONE region.
	// It is important that we not give it a malloc tag or protection above
	// PROT_NONE to avoid chunking or special CoW treatment from the VM - we
	// need for this to be just one entry.
	mach_vm_address_t ptr_addr = (mach_vm_address_t)ptr_start;
	mach_vm_size_t reservation_size = (mach_vm_size_t)ptr_reservation_size;
	int alloc_flags = 0; // fixed, no tag
	kern_return_t kr = mach_vm_map(mach_task_self(), &ptr_addr,
			reservation_size, /* mask */ 0, alloc_flags, MEMORY_OBJECT_NULL,
			/* offset */ 0, /* copy */ FALSE, VM_PROT_NONE, VM_PROT_NONE,
			VM_INHERIT_DEFAULT);
	if (kr != KERN_SUCCESS) {
		// We could fall back to mvm allocation, but we want this to fail loudly
		// if something starts preventing us from being able to make the
		// reservation we need
		xzm_abort_with_reason(
				"pointer range initial reservation failed", kr);
	}

	main->xzmz_use_ranges = true;

	mach_vm_address_t ptr_base = ptr_addr + XZM_RANGE_SEPARATION;

	struct xzm_vm_range range = {
		.min_address = ptr_base,
		.max_address = ptr_base + XZM_POINTER_RANGE_SIZE,
	};
	_xzm_main_malloc_zone_init_ptr_fronts(main->xzmz_range_groups,
			main->xzmz_allocation_front_count, &range, 1, NULL);

	// Choose a starting hint for the data range

	uint64_t data_candidate_span = ptr_candidate_span - XZM_DATA_RANGE_SIZE;
	uint64_t data_candidate_granules =
			data_candidate_span / XZM_PAGE_TABLE_GRANULE;

	uint64_t data_entropy = malloc_entropy[1] >> 32;
	uint64_t data_granule = data_entropy % data_candidate_granules;

	uint64_t data_start;
	if (data_granule < ptr_granule) {
		data_start = XZM_MACOS_RANGES_START +
				(data_granule * XZM_PAGE_TABLE_GRANULE);
	} else {
		uint64_t ptr_reservation_granules =
				ptr_reservation_size / XZM_PAGE_TABLE_GRANULE;
		uint64_t data_adjusted_granule =
				data_granule + ptr_reservation_granules;
		data_start = XZM_MACOS_RANGES_START +
				(data_adjusted_granule * XZM_PAGE_TABLE_GRANULE);
	}

	xzm_assert(data_start < ptr_start ||
			data_start >= ptr_start + ptr_reservation_size);
	xzm_assert(data_start + XZM_DATA_RANGE_SIZE <= XZM_MACOS_RANGES_END);

	xzm_range_group_t data_rg = &main->xzmz_range_groups[XZM_RANGE_GROUP_DATA];
	xzm_debug_assert(data_rg->xzrg_id == XZM_RANGE_GROUP_DATA);

	data_rg->xzrg_base = (mach_vm_address_t)data_start;
#endif // CONFIG_MACOS_RANGES
}

#pragma mark segment group

static void _xzm_segment_group_clear_chunk(xzm_segment_group_t sg,
		uint8_t *start, size_t size);

static void _xzm_segment_group_split_huge_segment(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_slice_count_t required_slices);

#if CONFIG_XZM_DEFERRED_RECLAIM

static void
__xzm_segment_cache_remove(xzm_segment_cache_t cache,
		xzm_segment_t segment)
{
	xzm_debug_assert(cache->xzsc_count > 0);
	cache->xzsc_count--;
	TAILQ_REMOVE(&cache->xzsc_head, segment, xzs_cache_entry);
}

static void
__xzm_segment_cache_insert(xzm_segment_cache_t cache, xzm_segment_t segment)
{
	xzm_debug_assert(cache->xzsc_count < cache->xzsc_max_count);
	TAILQ_INSERT_HEAD(&cache->xzsc_head, segment, xzs_cache_entry);
	cache->xzsc_count++;
}

static void
_xzm_segment_group_cache_invalidate(xzm_segment_group_t sg,
		xzm_segment_t segment)
{
#ifdef DEBUG
	_malloc_lock_assert_owner(&sg->xzsg_cache.xzsc_lock);
#endif
	__xzm_segment_cache_remove(&sg->xzsg_cache, segment);
	// Free memory backing segment header
	xzm_metapool_free(&sg->xzsg_main_ref->xzmz_metapools[XZM_METAPOOL_SEGMENT],
			segment);
}

static void
_xzm_segment_group_cache_mark_free(xzm_segment_group_t sg,
		xzm_segment_t segment)
{
#ifdef DEBUG
	_malloc_lock_assert_owner(&sg->xzsg_cache.xzsc_lock);
	// Make sure that this segment isn't in the segment table before we put it
	// into the cache
	xzm_segment_table_entry_s *entry;
	entry = _xzm_ptr_to_table_entry(_xzm_segment_start(segment),
			sg->xzsg_main_ref);
	xzm_debug_assert(entry->xste_val == 0);
#endif
	xzm_debug_assert(segment->xzs_reclaim_id == VM_RECLAIM_ID_NULL);

	xzm_main_malloc_zone_t main = sg->xzsg_main_ref;
	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	uint8_t *addr = _xzm_segment_start(segment);
	size_t size = _xzm_segment_size(segment);
	segment->xzs_reclaim_id = _xzm_reclaim_mark_free(buffer, addr, size, false);
	__xzm_segment_cache_insert(&sg->xzsg_cache, segment);
}

// Attempt to re-use a segment from the cache. Returns true if successful.
// If unsuccessful, the caller should invalidate the segment's cache entry.
static bool
_xzm_segment_group_cache_mark_used(xzm_segment_group_t sg,
		xzm_segment_t segment)
{
#ifdef DEBUG
	_malloc_lock_assert_owner(&sg->xzsg_cache.xzsc_lock);
#endif
	xzm_debug_assert(segment->xzs_reclaim_id != VM_RECLAIM_ID_NULL);
	xzm_main_malloc_zone_t main = sg->xzsg_main_ref;
	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;
	mach_vm_reclaim_state_t state;

	state = _xzm_reclaim_mark_used(buffer, segment->xzs_reclaim_id,
			_xzm_segment_start(segment), _xzm_segment_size(segment), false);
	if (!mach_vm_reclaim_is_reusable(state)) {
		// Entry has been reclaimed by the kernel since being placed in cache
		_xzm_segment_group_cache_invalidate(sg, segment);
		return false;
	}
	segment->xzs_reclaim_id = VM_RECLAIM_ID_NULL;
	__xzm_segment_cache_remove(&sg->xzsg_cache, segment);
	return true;
}

// Evict a segment from the cache
static void
_xzm_segment_group_cache_evict(xzm_segment_group_t sg)
{
#ifdef DEBUG
	_malloc_lock_assert_owner(&sg->xzsg_cache.xzsc_lock);
#endif
	// approximate the oldest segment by evicting the tail
	xzm_segment_t segment = TAILQ_LAST(&sg->xzsg_cache.xzsc_head,
				xzm_segment_cache_head_s);
	xzm_debug_assert(segment->xzs_reclaim_id != VM_RECLAIM_ID_NULL);
	if (_xzm_segment_group_cache_mark_used(sg, segment)) {
		_malloc_lock_unlock(&sg->xzsg_cache.xzsc_lock);
		// Segment isn't in segment table while in the cache, so pass false for
		// free_from_table while deallocating
		_xzm_segment_group_segment_deallocate(sg, segment, false);
		_malloc_lock_lock(&sg->xzsg_cache.xzsc_lock);
	}
}

#endif // CONFIG_XZM_DEFERRED_RECLAIM

// mimalloc: mi_slice_bin8
MALLOC_ALWAYS_INLINE MALLOC_INLINE
static size_t
xzm_slice_bin8(xzm_slice_count_t slice_count)
{
	xzm_debug_assert(slice_count != 0);
	if (slice_count <= 8) {
		return slice_count - 1;
	}

	xzm_debug_assert(slice_count <= XZM_SLICES_PER_SEGMENT);
	slice_count--;

	int msb = 63 - __builtin_clzl(slice_count);
	return ((msb << 2) + ((slice_count >> (msb - 2)) & 0x3)) - 5;
}

// mimalloc: mi_slice_bin
MALLOC_ALWAYS_INLINE MALLOC_INLINE
static size_t
xzm_slice_bin(xzm_slice_count_t slice_count)
{
	xzm_debug_assert(slice_count * XZM_SEGMENT_SLICE_SIZE <= XZM_SEGMENT_SIZE);
	xzm_debug_assert(xzm_slice_bin8(XZM_SLICES_PER_SEGMENT) <
			XZM_SPAN_QUEUE_COUNT);
	size_t bin = xzm_slice_bin8(slice_count);
	xzm_debug_assert(bin < XZM_SPAN_QUEUE_COUNT);
	return bin;
}

// mimalloc: mi_span_queue_for
static xzm_span_queue_t
xzm_span_queue_for(xzm_segment_group_t sg, xzm_slice_count_t slice_count)
{
	size_t bin = xzm_slice_bin(slice_count);
	xzm_span_queue_t sq = &sg->xzsg_spans[bin];
	xzm_debug_assert(sq->xzsq_slice_count >= slice_count);
	return sq;
}

#ifdef DEBUG
static void
_xzm_segment_group_assert_correct_span_queue(xzm_segment_group_t sg,
		xzm_slice_t slice)
{
	xzm_slice_kind_t kind = slice->xzc_bits.xzcb_kind;
	xzm_assert(_xzm_slice_kind_is_free_span(kind));

	xzm_slice_count_t slice_count;
	if (kind == XZM_SLICE_KIND_SINGLE_FREE) {
		slice_count = 1;
	} else {
		slice_count = slice->xzcs_slice_count;
	}

	xzm_span_queue_t sq = xzm_span_queue_for(sg, slice_count);
	xzm_free_span_t span;
	LIST_FOREACH(span, &sq->xzsq_queue, xzc_entry) {
		if (span == slice) {
			return;
		}
	}
	xzm_abort("Didn't find free span in expected span queue");
}

// mimalloc: mi_segment_is_valid
static bool
_xzm_segment_group_segment_is_valid(xzm_segment_group_t sg,
		xzm_segment_t segment)
{
	xzm_assert(segment->xzs_segment_group == sg);

	xzm_slice_t end = _xzm_segment_slices_end(segment);
	xzm_slice_t slice = _xzm_segment_slices_begin(segment);

	if (segment->xzs_kind == XZM_SEGMENT_KIND_HUGE) {
		xzm_assert(segment->xzs_used == 1);
		xzm_chunk_t chunk = slice;
		xzm_assert(chunk->xzc_bits.xzcb_kind == XZM_SLICE_KIND_HUGE_CHUNK);
		xzm_assert(chunk->xzcs_slice_count == segment->xzs_slice_count);
		return true;
	}

	_malloc_lock_assert_owner(&sg->xzsg_lock);

	while (slice < end) {
		xzm_slice_kind_t kind = slice->xzc_bits.xzcb_kind;
		switch (kind) {
		case XZM_SLICE_KIND_TINY_CHUNK:
			slice++;
			break;
		case XZM_SLICE_KIND_SMALL_CHUNK:
		case XZM_SLICE_KIND_SMALL_FREELIST_CHUNK:
		case XZM_SLICE_KIND_LARGE_CHUNK: {
			size_t slice_index = _xzm_slice_index(segment, slice);
			size_t slice_count = slice->xzcs_slice_count;
			xzm_assert(slice_count > 1);

			slice++;
			size_t extra = MIN(slice_count - 1, XZM_MAX_SLICE_OFFSET);
			for (size_t i = 1; i <= extra; i++, slice++) {
				xzm_assert(slice->xzc_bits.xzcb_kind ==
						XZM_SLICE_KIND_MULTI_BODY);
				xzm_assert(slice->xzsl_slice_offset_bytes ==
						(uint32_t)(sizeof(struct xzm_slice_s) * i));
			}

			size_t last_slice_index = slice_index + slice_count - 1;
			xzm_assert(last_slice_index < segment->xzs_slice_entry_count);
			xzm_slice_t last = &segment->xzs_slices[last_slice_index];
			if (last >= slice) {
				xzm_assert(last->xzc_bits.xzcb_kind ==
						XZM_SLICE_KIND_MULTI_BODY);
				xzm_assert(last->xzsl_slice_offset_bytes ==
						(uint32_t)(sizeof(struct xzm_slice_s) *
						(slice_count - 1)));
			}
			slice = last + 1;
			break;
		}
		case XZM_SLICE_KIND_GUARD: {
			size_t slice_count = slice->xzcs_slice_count;
			slice++;

			for (size_t i = 1; i < slice_count; i++, slice++) {
				xzm_assert(slice->xzc_bits.xzcb_kind ==
						XZM_SLICE_KIND_MULTI_BODY);
				xzm_assert(slice->xzsl_slice_offset_bytes ==
						   (uint32_t)(sizeof(struct xzm_slice_s) * i));
			}

			// Adjacent guards should always be coalesced
			if (slice < end) {
				xzm_assert(slice->xzc_bits.xzcb_kind != XZM_SLICE_KIND_GUARD);
			}

			break;
		}
		case XZM_SLICE_KIND_HUGE_CHUNK:
			xzm_abort("huge chunk in normal segment");
			break;
		case XZM_SLICE_KIND_SINGLE_FREE: {
			xzm_assert(slice->xzc_mzone_idx == XZM_MZONE_INDEX_INVALID);
			_xzm_segment_group_assert_correct_span_queue(sg, slice);
#if CONFIG_XZM_DEFERRED_RECLAIM
			mach_vm_reclaim_id_t *reclaim_id =
					_xzm_segment_slice_meta_reclaim_id(segment, slice);
			xzm_assert(*reclaim_id == VM_RECLAIM_ID_NULL ||
					!slice->xzc_bits.xzcb_is_pristine);
#endif // CONFIG_XZM_DEFERRED_RECLAIM
			slice++;
			break;
		}
		case XZM_SLICE_KIND_MULTI_FREE: {
			xzm_assert(slice->xzc_mzone_idx == XZM_MZONE_INDEX_INVALID);
			_xzm_segment_group_assert_correct_span_queue(sg, slice);

			size_t slice_index = _xzm_slice_index(segment, slice);
			size_t slice_count = slice->xzcs_slice_count;
			xzm_assert(slice_count > 1);

			size_t last_slice_index = slice_index + slice_count - 1;
			xzm_assert(last_slice_index < segment->xzs_slice_entry_count);

			xzm_slice_t last = &segment->xzs_slices[last_slice_index];
			xzm_assert(last->xzc_bits.xzcb_kind ==
					XZM_SLICE_KIND_MULTI_BODY);
			xzm_assert(last->xzsl_slice_offset_bytes ==
					(uint32_t)(sizeof(struct xzm_slice_s) * (slice_count - 1)));

#if CONFIG_XZM_DEFERRED_RECLAIM
			mach_vm_reclaim_id_t *reclaim_id =
					_xzm_segment_slice_meta_reclaim_id(segment, slice);
			xzm_assert(*reclaim_id == VM_RECLAIM_ID_NULL ||
					!slice->xzc_bits.xzcb_is_pristine);
#endif // CONFIG_XZM_DEFERRED_RECLAIM

			slice = last + 1;
			break;
		}
		default:
			xzm_abort_with_reason("Unexpected slice kind", (unsigned)kind);
			break;
		}
	}

	return true;
}
#endif // DEBUG

#if CONFIG_XZM_DEFERRED_RECLAIM

static void
_xzm_segment_group_span_mark_free(xzm_segment_group_t sg, xzm_free_span_t span)
{
	xzm_debug_assert(_xzm_segment_group_uses_deferred_reclamation(sg));
	xzm_debug_assert(_xzm_slice_kind_is_free_span(span->xzc_bits.xzcb_kind));

	xzm_main_malloc_zone_t main = sg->xzsg_main_ref;
	xzm_malloc_zone_t zone = &main->xzmz_base;
	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	mach_vm_reclaim_id_t *reclaim_id = _xzm_slice_meta_reclaim_id(zone, span);
	xzm_debug_assert(*reclaim_id == VM_RECLAIM_ID_NULL);
	size_t span_size = _xzm_free_span_size(span);
	uint8_t *span_start = _xzm_slice_start(zone, span);

	*reclaim_id = _xzm_reclaim_mark_free(buffer, span_start, span_size, true);
}

static bool
_xzm_segment_group_span_mark_used(xzm_segment_group_t sg, xzm_free_span_t span)
{
	xzm_debug_assert(_xzm_segment_group_uses_deferred_reclamation(sg));
	xzm_debug_assert(_xzm_slice_kind_is_free_span(span->xzc_bits.xzcb_kind));
	xzm_main_malloc_zone_t main = sg->xzsg_main_ref;
	xzm_malloc_zone_t zone = &main->xzmz_base;

	if (!_xzm_slice_is_deferred(zone, span)) {
		// span has not been marked free
		return true;
	}

	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	mach_vm_reclaim_id_t *reclaim_id = _xzm_slice_meta_reclaim_id(zone, span);
	xzm_debug_assert(*reclaim_id != VM_RECLAIM_ID_NULL);
	size_t span_size = _xzm_free_span_size(span);
	uint8_t *span_start = _xzm_slice_start(zone, span);
	mach_vm_reclaim_state_t state;

	state = _xzm_reclaim_mark_used(buffer, *reclaim_id, span_start,
			span_size, true);
	if (mach_vm_reclaim_is_reusable(state)) {
		*reclaim_id = VM_RECLAIM_ID_NULL;
		return true;
	}
	return false;
}

static bool
_xzm_segment_group_span_mark_smaller(xzm_segment_group_t sg,
		xzm_free_span_t span, xzm_slice_count_t front_free_count,
		xzm_slice_count_t used_count, xzm_slice_count_t back_free_count)
{
	xzm_debug_assert(_xzm_segment_group_uses_deferred_reclamation(sg));
	xzm_debug_assert(_xzm_slice_kind_is_free_span(span->xzc_bits.xzcb_kind));
	xzm_debug_assert(front_free_count + used_count + back_free_count ==
			_xzm_free_span_slice_count(span));

	xzm_main_malloc_zone_t main = sg->xzsg_main_ref;
	xzm_malloc_zone_t zone = &main->xzmz_base;
	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	const bool deferred = _xzm_slice_is_deferred(zone, span);
	uint64_t *span_id = _xzm_slice_meta_reclaim_id(zone, span);
	uint8_t *span_start = _xzm_slice_start(zone, span);
	bool pristine = span->xzc_bits.xzcb_is_pristine;

	// Actual span metadata for the front/middle/back spans has not yet been
	// updated, we only set the deferred reclaim metadata for these spans
	xzm_free_span_t back_span = span + front_free_count + used_count;
	const size_t front_size = front_free_count << XZM_SEGMENT_SLICE_SHIFT;
	const size_t used_size = used_count << XZM_SEGMENT_SLICE_SHIFT;
	const size_t back_size = back_free_count << XZM_SEGMENT_SLICE_SHIFT;
	xzm_debug_assert(!back_size || span_start + front_size + used_size ==
			_xzm_slice_start(zone, back_span));
	uint64_t *back_id = back_size ?
			_xzm_slice_meta_reclaim_id(zone, back_span) : NULL;
	return xzm_reclaim_mark_smaller(buffer, span_id, back_id, span_start,
			front_size, used_size, back_size, deferred, pristine, true);
}

void
xzm_chunk_mark_free(xzm_malloc_zone_t zone, xzm_chunk_t chunk)
{
	xzm_main_malloc_zone_t main = _xzm_malloc_zone_main(zone);
	xzm_debug_assert(_xzm_chunk_should_defer_reclamation(main, chunk));

	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	mach_vm_reclaim_id_t *reclaim_id = _xzm_slice_meta_reclaim_id(zone, chunk);
	xzm_debug_assert(*reclaim_id == VM_RECLAIM_ID_NULL);
	size_t chunk_size;
	uint8_t *chunk_start = _xzm_chunk_start_ptr(zone, chunk, &chunk_size);

	*reclaim_id = _xzm_reclaim_mark_free(buffer, chunk_start, chunk_size,
			true);
}

bool
xzm_chunk_mark_used(xzm_malloc_zone_t zone, xzm_chunk_t chunk,
		bool *was_reclaimed)
{
	xzm_main_malloc_zone_t main = _xzm_malloc_zone_main(zone);
	xzm_debug_assert(_xzm_slice_kind_is_chunk(chunk->xzc_bits.xzcb_kind));
	xzm_debug_assert(_xzm_chunk_should_defer_reclamation(main, chunk));

	xzm_reclaim_buffer_t buffer = main->xzmz_reclaim_buffer;

	mach_vm_reclaim_id_t *reclaim_id = _xzm_slice_meta_reclaim_id(zone, chunk);
	xzm_debug_assert(*reclaim_id != VM_RECLAIM_ID_NULL);
	size_t chunk_size;
	uint8_t *chunk_start = _xzm_chunk_start_ptr(zone, chunk, &chunk_size);
	mach_vm_reclaim_state_t state;

	state = _xzm_reclaim_mark_used(buffer, *reclaim_id, chunk_start,
			chunk_size, true);

	if (was_reclaimed) {
		*was_reclaimed = (state != VM_RECLAIM_UNRECLAIMED);
	}
	if (mach_vm_reclaim_is_reusable(state)) {
		*reclaim_id = VM_RECLAIM_ID_NULL;
		return true;
	}
	return false;
}

#endif // CONFIG_XZM_DEFERRED_RECLAIM

// mimalloc: mi_segment_span_free
//
// Precondition: sg is locked (except for huge segments)
static void
_xzm_segment_group_segment_span_free(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_slice_count_t slice_index,
		xzm_slice_count_t slice_count, bool set_id, bool is_pristine)
{
	xzm_debug_assert(slice_count != 0);
	xzm_debug_assert(slice_index < segment->xzs_slice_entry_count);

	// set first and last slice (the intermediates can be undetermined)
	//
	// TODO: leaving the intermediates undetermined means that you can't
	// reliably check whether an arbitrary slice in a segment belongs to a
	// chunk.  That would be useful for:
	// - the checked memcpy trick
	// - malloc_claimed_address()
	// - possibly other things?
	//
	// However, for large allocations it would require updating large numbers of
	// slices, which is probably not worth the cost
	xzm_free_span_t span = &segment->xzs_slices[slice_index];
	span->xzc_bits.xzcb_is_pristine = is_pristine;
	if (slice_count == 1) {
		xzm_debug_assert(segment->xzs_kind != XZM_SEGMENT_KIND_HUGE);
		span->xzc_bits.xzcb_kind = XZM_SLICE_KIND_SINGLE_FREE;
	} else {
		span->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_FREE;
		span->xzcs_slice_count = slice_count;

		xzm_debug_assert(slice_index + slice_count - 1 < segment->xzs_slice_entry_count);
		xzm_slice_t last = &segment->xzs_slices[slice_index + slice_count - 1];
		last->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
		last->xzsl_slice_offset_bytes =
				(uint32_t)(sizeof(struct xzm_slice_s) * (slice_count - 1));
	}

	if (segment->xzs_kind == XZM_SEGMENT_KIND_NORMAL) {
#ifdef DEBUG
		_malloc_lock_assert_owner(&sg->xzsg_lock);
#endif
		xzm_span_queue_t sq = xzm_span_queue_for(sg, slice_count);
		LIST_INSERT_HEAD(&sq->xzsq_queue, span, xzc_entry);
	}
#if CONFIG_XZM_DEFERRED_RECLAIM
	if (set_id) {
		mach_vm_reclaim_id_t *reclaim_id = _xzm_segment_slice_meta_reclaim_id(
				segment, span);
		*reclaim_id = VM_RECLAIM_ID_NULL;
	} else if (!is_pristine) {
		xzm_debug_assert(*_xzm_segment_slice_meta_reclaim_id(segment, span) !=
				VM_RECLAIM_ID_NULL);
	}
#endif // CONFIG_XZM_DEFERRED_RECLAIM
}

// mimalloc: mi_segment_slice_split
static xzm_free_span_t
_xzm_segment_group_segment_slice_split(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_free_span_t span,
		xzm_slice_count_t slice_count, bool uses_dr, bool front)
{
	xzm_debug_assert(_xzm_segment_for_slice(&sg->xzsg_main_ref->xzmz_base, span) == segment);
	xzm_debug_assert(span->xzc_bits.xzcb_kind == XZM_SLICE_KIND_MULTI_FREE);
	xzm_debug_assert(span->xzcs_slice_count > slice_count);
	xzm_debug_assert(segment->xzs_kind != XZM_SEGMENT_KIND_HUGE);

	// Find the start and length of the piece being split off and update its
	// slices
	xzm_free_span_t retval;
	xzm_slice_count_t index_to_free;
	xzm_slice_count_t count_to_free = span->xzcs_slice_count - slice_count;
	if (front) {
		retval = span + count_to_free;
		// We don't update the backpointers here because this span is about to
		// be used as a large chunk, but we do need to update the slice count
		// and kind since this span could be given back to _segment_slice_split
		// to split off the back end
		retval->xzcs_slice_count = span->xzcs_slice_count - count_to_free;
		// We could probably copy the bits wholesale, but for now only
		// explicitly copy the ones we know we need
		retval->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_FREE;
		// Preserve whether the span is pristine, since it was undefined
		retval->xzc_bits.xzcb_is_pristine = span->xzc_bits.xzcb_is_pristine;
		index_to_free = _xzm_slice_index(segment, span);
	} else {
		retval = span;
		index_to_free = _xzm_slice_index(segment, span) + slice_count;
	}
	// If the segment group uses deferred reclaim, then the reclaim id for the
	// split span has already been initialized, so don't overwrite it
	_xzm_segment_group_segment_span_free(sg, segment, index_to_free,
			count_to_free, !uses_dr, span->xzc_bits.xzcb_is_pristine);
	return retval;
}

static void
_xzm_segment_group_segment_create_guard(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_slice_count_t index)
{
	xzm_slice_t slice = &segment->xzs_slices[index];
	xzm_slice_count_t slice_count = 1;

	// Coalesce with next guard page
	if (&slice[1] < _xzm_segment_slices_end(segment) &&
			slice[1].xzc_bits.xzcb_kind == XZM_SLICE_KIND_GUARD) {
		slice_count += slice[1].xzcs_slice_count;
	}

	// Coalesce with previous guard page
	if (slice > _xzm_segment_slices_begin(segment)) {
		xzm_slice_t prev = _xzm_span_slice_first(slice - 1);
		if (prev->xzc_bits.xzcb_kind == XZM_SLICE_KIND_GUARD) {
			index -= prev->xzcs_slice_count;
			slice_count += prev->xzcs_slice_count;
			slice = prev;
		}
	}

	if (slice_count > 1) {
		// Setup backpointers
		for (int i = 1; i < slice_count; i++) {
			slice[i].xzsl_slice_offset_bytes = i * sizeof(struct xzm_slice_s);
			slice[i].xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
		}
	} else {
		// This is a new guard page entry, increment segment count to avoid
		// trying to free this segment while it has guards
		segment->xzs_used++;
	}

	xzm_debug_assert(slice == &segment->xzs_slices[index]);

	slice->xzcs_slice_count = slice_count;
	// mprotect
	size_t size = XZM_SEGMENT_SLICE_SIZE * slice_count;
	void *start = _xzm_segment_slice_index_start(segment, index);
	int rc = mprotect(start, size, PROT_NONE);
	if (rc) {
		xzm_abort_with_reason("Failed to mprotect guard page", errno);
	}

	// Atomic store maybe?
	slice->xzc_bits.xzcb_kind = XZM_SLICE_KIND_GUARD;
}

// mimalloc: mi_segment_span_allocate
static xzm_chunk_t
_xzm_segment_group_segment_span_mark_allocated(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_slice_kind_t kind, size_t slice_index,
		xzm_slice_count_t slice_count)
{
	xzm_debug_assert(_xzm_slice_kind_is_chunk(kind));
	xzm_debug_assert(slice_index < segment->xzs_slice_entry_count);

	xzm_slice_t slice = &segment->xzs_slices[slice_index];
	xzm_chunk_t chunk = slice;

	// set slice back pointers for the first XZM_MAX_SLICE_OFFSET entries
	size_t extra = MIN(slice_count - 1, XZM_MAX_SLICE_OFFSET);
	if (slice_index + extra >= segment->xzs_slice_entry_count) {
		// huge objects may have more slices than available entries in the
		// segment->xzs_slices table
		extra = segment->xzs_slice_entry_count - slice_index - 1;
	}
	slice++;
	for (size_t i = 1; i <= extra; i++, slice++) {
		slice->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
		slice->xzsl_slice_offset_bytes =
				(uint32_t)(sizeof(struct xzm_slice_s) * i);
	}

	// And also for the last one, if not set already (the last one is needed for
	// coalescing)
	size_t last_slice_index = slice_index + slice_count - 1;
	if (kind != XZM_SLICE_KIND_HUGE_CHUNK) {
		xzm_debug_assert(last_slice_index < segment->xzs_slice_entry_count);

		xzm_slice_t last = &segment->xzs_slices[last_slice_index];
		if (last >= slice) {
			last->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
			last->xzsl_slice_offset_bytes =
					(uint32_t)(sizeof(struct xzm_slice_s) * (slice_count - 1));
		}
	}

	// Update the chunk slice last, setting the kind at the very end to
	// "publish" the chunk for the enumerator protocol
	if (kind != XZM_SLICE_KIND_TINY_CHUNK) {
		chunk->xzcs_slice_count = slice_count;
	} else {
		xzm_debug_assert(slice_count == 1);
	}
	// TODO: atomic store, compiler barrier
	chunk->xzc_bits.xzcb_kind = kind;

#if CONFIG_XZM_DEFERRED_RECLAIM
	mach_vm_reclaim_id_t *reclaim_id = _xzm_segment_slice_meta_reclaim_id(
			segment, chunk);
	*reclaim_id = VM_RECLAIM_ID_NULL;
#endif // CONFIG_XZM_DEFERRED_RECLAIM
	segment->xzs_used++;
	return chunk;
}

// Taken from xnu/osfmk/kern/zalloc.c
static inline uint32_t
dist_u32(uint32_t a, uint32_t b)
{
	return a < b ? b - a : a - b;
}

static uint32_t
_xzm_random_clear_n_bits(uint32_t mask, uint32_t pop, uint32_t n)
{
	for(; n--; pop--) {
		uint32_t bit = arc4random_uniform(pop);
		uint32_t m = mask;
		// Clear the bottom `bit` bits from m...
		for (; bit; bit--) {
			m &= (m - 1);
		}
		// ... in order to clear the `bit`th least significant set bit in mask
		mask ^= 1 << __builtin_ctz(m);
	}
	return mask;
}

// Create a bitmap `width` bits wide with `pop` set bits
static uint32_t
_xzm_random_bits(uint32_t pop, uint32_t width)
{
	uint32_t mask = (uint32_t)((1ull << width) - 1);
	uint32_t retval;
	uint32_t cur;

	if (3 * width / 4 <= pop) {
		// Caller wants >75% of the bits set, so set them all and clear <25%
		retval = mask;
		cur = width;
	} else if (pop <= width / 4) {
		retval = 0;
		cur = 0;
	} else {
		// A masked value from arc4random should contain ~`width/2` set bits
		retval = arc4random() & mask;
		cur = __builtin_popcount(retval);

		if (dist_u32(cur, pop) > dist_u32(width - cur, pop)) {
			// If the opposite mask has a closer popcount, then start with that
			cur = width - cur;
			retval ^= mask;
		}
	}

	if (cur < pop) {
		// Setting `pop - cur` bits is really clearing that many from the
		// opposite mask.
		retval ^= mask;
		retval = _xzm_random_clear_n_bits(retval, width - cur, pop - cur);
		retval ^= mask;
	} else if (pop < cur) {
		retval = _xzm_random_clear_n_bits(retval, cur, cur - pop);
	}
	xzm_debug_assert(__builtin_popcount(retval) == pop);
	xzm_debug_assert((retval & ~mask) == 0);
	return retval;
}

static xzm_chunk_t
_xzm_segment_group_segment_span_init_run(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_slice_kind_t kind,
		xzm_preallocate_list_s *preallocate_list, xzm_slice_count_t start_index,
		xzm_slice_count_t total_slices, xzm_slice_count_t guard_count,
		xzm_slice_count_t num_chunks)
{
	xzm_chunk_t retval = NULL;
	uint32_t guard_mask;
	if (guard_count) {
		guard_mask = _xzm_random_bits(guard_count, num_chunks + 1);
	} else {
		guard_mask = 0;
	}

	xzm_slice_count_t slices_per_chunk = 0;
	if (kind == XZM_SLICE_KIND_TINY_CHUNK) {
		slices_per_chunk = 1;
	} else if (kind == XZM_SLICE_KIND_SMALL_CHUNK) {
		slices_per_chunk = XZM_SMALL_CHUNK_SIZE / XZM_SEGMENT_SLICE_SIZE;
	} else if (kind == XZM_SLICE_KIND_SMALL_FREELIST_CHUNK) {
		slices_per_chunk =
				XZM_SMALL_FREELIST_CHUNK_SIZE / XZM_SEGMENT_SLICE_SIZE;
	} else {
		xzm_debug_assert(!preallocate_list);
		xzm_debug_assert(guard_count == 0);
		xzm_debug_assert(kind == XZM_SLICE_KIND_LARGE_CHUNK);
		xzm_debug_assert(num_chunks == 1);
		slices_per_chunk = total_slices;
	}
	xzm_debug_assert((num_chunks * slices_per_chunk + guard_count) ==
			total_slices);
	xzm_debug_assert((start_index + total_slices) <=
			segment->xzs_slice_entry_count);

	xzm_slice_count_t index = start_index;
	bool is_pristine = segment->xzs_slices[index].xzc_bits.xzcb_is_pristine;

	for (int i = 0; i < num_chunks; i++) {
		if (guard_mask & 1) {
			_xzm_segment_group_segment_create_guard(sg, segment, index);
			index++;
		}
		guard_mask >>= 1;

		xzm_chunk_t chunk = _xzm_segment_group_segment_span_mark_allocated(sg,
				segment, kind, index, slices_per_chunk);
		chunk->xzc_bits.xzcb_is_pristine = is_pristine;
		index += slices_per_chunk;

		if (i == 0) {
			retval = chunk;
		} else {
			SLIST_INSERT_HEAD(preallocate_list, chunk, xzc_slist_entry);
		}
	}

	xzm_debug_assert(guard_mask <= 1);
	if (guard_mask) {
		_xzm_segment_group_segment_create_guard(sg, segment, index);
		index++;
	}

	xzm_debug_assert(index - start_index == total_slices);
	return retval;
}

// mimalloc: mi_segments_page_find_and_allocate
// Precondition: sg is locked
static xzm_chunk_t
_xzm_segment_group_find_and_allocate_chunk(xzm_segment_group_t sg,
		xzm_slice_kind_t kind, xzm_xzone_guard_config_t guard_config,
		xzm_preallocate_list_s *preallocate_list, xzm_slice_count_t slice_count,
		size_t alignment)
{
	xzm_debug_assert(_xzm_slice_kind_is_chunk(kind));
	xzm_debug_assert(kind != XZM_SLICE_KIND_TINY_CHUNK || slice_count == 1);
	xzm_debug_assert(slice_count != 0);
	xzm_debug_assert(slice_count * XZM_SEGMENT_SLICE_SIZE <=
			XZM_LARGE_BLOCK_SIZE_MAX);
	xzm_debug_assert(alignment == 0 || kind == XZM_SLICE_KIND_LARGE_CHUNK);

	xzm_debug_assert(kind != XZM_SLICE_KIND_TINY_CHUNK || guard_config != NULL);
	xzm_debug_assert(kind != XZM_SLICE_KIND_SMALL_CHUNK || guard_config != NULL);
	xzm_debug_assert(kind != XZM_SLICE_KIND_SMALL_FREELIST_CHUNK ||
			guard_config != NULL);
	xzm_debug_assert(kind != XZM_SLICE_KIND_LARGE_CHUNK || guard_config == NULL);

	if (alignment <= XZM_SEGMENT_SLICE_SIZE) {
		// Large chunks guarantee page alignment
		alignment = 0;
	}
	xzm_slice_count_t alignment_slices;
	if (os_convert_overflow(alignment / XZM_SEGMENT_SLICE_SIZE, &alignment_slices)) {
		xzm_debug_abort_with_reason("Unexpected align value", alignment);
		return NULL;
	}

	xzm_slice_count_t total_slice_count;
	uint8_t chunks_in_run;
	uint8_t guards;
	if (guard_config && guard_config->xxgc_max_run_length) {
		chunks_in_run = arc4random_uniform(guard_config->xxgc_max_run_length) + 1;
		total_slice_count = chunks_in_run * slice_count;
		guards = (guard_config->xxgc_density * total_slice_count) / 256;
		uint32_t remainder = (guard_config->xxgc_density * total_slice_count) %
				256;
		// short circuit to avoid a call to corecrypto in common case that the
		// density of guard pages goes perfectly into the allocated pages
		if (remainder && remainder > arc4random_uniform(256)) {
			guards++;
		}
		total_slice_count += guards;
	} else {
		total_slice_count = slice_count;
		chunks_in_run = 1;
		guards = 0;
	}
	xzm_debug_assert(total_slice_count <=
			(XZM_LARGE_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE) ||
			// Aligned allocations can request more than LARGE_BLOCK_SIZE slices
			// from the span queue
			alignment != 0);
	// At present, we only allow 1 guard page between chunks in a run, so it
	// shouldn't be possible to have more guards than chunks
	xzm_debug_assert(chunks_in_run >= guards);

	if (alignment_slices) {
		// We only need to allocate (slice_count + alignment_slices - 1) slices
		// to guarantee that there will be a slice_count long span at the
		// correct alignment
		xzm_slice_count_t max_align_slices =
				alignment_slices ? alignment_slices - 1 : 0;

		if (os_add_overflow(total_slice_count, max_align_slices,
				&total_slice_count)) {
			xzm_debug_abort_with_reason("Unexpected total slice count",
					slice_count + max_align_slices);
			return NULL;
		}

		xzm_debug_assert(total_slice_count < XZM_SLICES_PER_SEGMENT);
	}

	for (xzm_span_queue_t sq = xzm_span_queue_for(sg, total_slice_count);
			sq < &sg->xzsg_spans[XZM_SPAN_QUEUE_COUNT];
			sq++) {
		// TODO: rather than allowing a range of span sizes in a span queue,
		// should all the spans be exactly the span queue size?  Then this would
		// be a pop rather than a list scan.
		xzm_free_span_t span, tmp;
		LIST_FOREACH_SAFE(span, &sq->xzsq_queue, xzc_entry, tmp) {
			xzm_slice_count_t span_slice_count =
					_xzm_free_span_slice_count(span);
			if (span_slice_count >= total_slice_count) {
				xzm_malloc_zone_t zone = &sg->xzsg_main_ref->xzmz_base;
				xzm_segment_t segment = _xzm_segment_for_slice(zone, span);
#if CONFIG_XZM_DEFERRED_RECLAIM
				xzm_slice_count_t old_total_slice_count = total_slice_count;
#endif // CONFIG_XZM_DEFERRED_RECLAIM
				xzm_slice_count_t front_free_count = 0;

				if (alignment_slices) {
					// Split off the front to round the address up to alignment
					xzm_slice_count_t actual_index = _xzm_slice_index(segment,
							span);
					xzm_slice_count_t desired_index = roundup(actual_index,
							alignment_slices);

					front_free_count = desired_index - actual_index;
					xzm_debug_assert(slice_count <= (total_slice_count - front_free_count));

					// Take the alignment slices back out of our request
					total_slice_count = slice_count;

					if (front_free_count) {
						span_slice_count -= front_free_count;
					}
				}

				xzm_slice_count_t back_free_count =
						span_slice_count - total_slice_count;

				bool uses_dr = false;
#if CONFIG_XZM_DEFERRED_RECLAIM
				uses_dr = _xzm_segment_group_uses_deferred_reclamation(sg);
				if (uses_dr) {
					if (!_xzm_segment_group_span_mark_smaller(sg, span,
							front_free_count, total_slice_count,
							back_free_count)) {
						total_slice_count = old_total_slice_count;
						// span is busy being reclaimed by the kernel
						continue;
					}
				}
#endif // CONFIG_XZM_DEFERRED_RECLAIM

				LIST_REMOVE(span, xzc_entry);

				if (front_free_count) {
					span = _xzm_segment_group_segment_slice_split(sg, segment,
							span, span_slice_count, uses_dr, true);
				}

				if (back_free_count) {
					_xzm_segment_group_segment_slice_split(sg, segment, span,
							total_slice_count, uses_dr, false);
				}

				xzm_slice_count_t index = _xzm_slice_index(segment, span);

				xzm_chunk_t chunk;
				chunk = _xzm_segment_group_segment_span_init_run(sg, segment,
							kind, preallocate_list, index, total_slice_count,
							guards, chunks_in_run);

				xzm_debug_assert(chunk);
				xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg,
						segment));

				return chunk;
			}
		}
	}

	return NULL;
}

// mi_segment_init
static xzm_chunk_t
_xzm_segment_group_init_segment(xzm_segment_group_t sg, xzm_segment_t segment,
		void *body, size_t body_size, bool huge, bool is_pristine)
{
	xzm_chunk_t chunk = NULL;
	xzm_assert((uintptr_t)segment < XZM_LIMIT_ADDRESS);
	xzm_assert((uintptr_t)body < XZM_LIMIT_ADDRESS);
	xzm_debug_assert((uintptr_t)segment % XZM_METAPOOL_SEGMENT_ALIGN == 0);
	xzm_debug_assert((uintptr_t)body % XZM_SEGMENT_SIZE == 0);
	xzm_debug_assert(body_size % XZM_SEGMENT_SLICE_SIZE == 0);

	xzm_slice_count_t total_slices = 0;
	if (os_convert_overflow(body_size / XZM_SEGMENT_SLICE_SIZE, &total_slices)) {
		xzm_abort("Slice count too large in init_segment");
	}
	segment->xzs_segment_group = sg;
	segment->xzs_slice_count = total_slices;
	segment->xzs_slice_entry_count = MIN(total_slices, XZM_SLICES_PER_SEGMENT);
	segment->xzs_used = 0;
	segment->xzs_segment_body = body;
#if CONFIG_XZM_DEFERRED_RECLAIM
	segment->xzs_reclaim_id = VM_RECLAIM_ID_NULL;
#endif // CONFIG_XZM_DEFERRED_RECLAIM

	segment->xzs_kind = huge ? XZM_SEGMENT_KIND_HUGE : XZM_SEGMENT_KIND_NORMAL;
	if (huge) {
		chunk = _xzm_segment_group_segment_span_mark_allocated(sg, segment,
				XZM_SLICE_KIND_HUGE_CHUNK, 0, segment->xzs_slice_count);
		chunk->xzc_bits.xzcb_is_pristine = is_pristine;
	} else {
		// Lock the segment group to add this span - we'll return to the caller
		// with the segment group locked so they can then directly allocate what
		// they need
		_malloc_lock_lock(&sg->xzsg_lock);

		_xzm_segment_group_segment_span_free(sg, segment, 0, total_slices,
				true, is_pristine);
	}
	xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
	return chunk;
}

// mimalloc: mi_segment_alloc
//
// Used to allocate both normal and huge segments.
//
// Postcondition: for normal segments, the segment group lock is held on
// successful return
static bool
_xzm_segment_group_alloc_segment(xzm_segment_group_t sg, size_t required_bytes,
		size_t alignment, xzm_chunk_t *huge_chunk, bool purgeable)
{
	xzm_chunk_t chunk;
	xzm_debug_assert((required_bytes == 0 && huge_chunk == NULL) ||
			(required_bytes > 0 && huge_chunk != NULL));

	bool huge = (required_bytes != 0);

	// non-default segment alignment is only supported for huge chunks
	xzm_debug_assert(huge || alignment == 0);

	// The total number of bytes we need to allocate is then:
	// - For normal segments, exactly the standard segment size
	// - For huge segments, the required body size, rounded up to the next slice
	size_t total_required_bytes;
	if (huge) {
		total_required_bytes = roundup(required_bytes, XZM_SEGMENT_SLICE_SIZE);
	} else {
		total_required_bytes = XZM_SEGMENT_SIZE;
	}

	xzm_range_group_t rg = sg->xzsg_range_group;

	xzm_range_group_alloc_flags_t rga_flags = 0;
	if (huge) {
		rga_flags |= XZM_RANGE_GROUP_ALLOC_FLAGS_HUGE;
	}

	if (purgeable) {
		rga_flags |= XZM_RANGE_GROUP_ALLOC_FLAGS_PURGEABLE;
	}

#if CONFIG_MTE
	// XXX Note: we need to allocate all data segments as taggable in order for
	// tag_data to work, but the vast majority of the space will be for
	// large/huge, which is a significant waste.  We're okay with that because
	// tag_data is not the default/production configuration, but we may need to
	// be more efficient about this in the future.
	if (_xzm_segment_group_memtag_enabled(sg)) {
		rga_flags |= XZM_RANGE_GROUP_ALLOC_FLAGS_MTE;
	}
#endif

	void *segment_body = xzm_range_group_alloc_segment(rg, total_required_bytes,
			alignment, mvm_plat_map(*map_ptr), rga_flags);
	if (!segment_body) {
		return false;
	}

	xzm_assert((uintptr_t)segment_body < XZM_LIMIT_ADDRESS);

	xzm_segment_t segment_meta = xzm_metapool_alloc(
			&sg->xzsg_main_ref->xzmz_metapools[XZM_METAPOOL_SEGMENT]);


	chunk = _xzm_segment_group_init_segment(sg, segment_meta, segment_body,
			total_required_bytes, huge, true);

	// Publish the segment in the segment table now that it has been properly
	// initialized
	_xzm_segment_table_allocated_at(sg->xzsg_main_ref, segment_body,
			segment_meta, !huge);

	if (huge) {
		*huge_chunk = chunk;
	}
	return true;
}

#if CONFIG_XZM_DEFERRED_RECLAIM

static xzm_chunk_t
_xzm_segment_group_alloc_huge_chunk_from_cache(xzm_segment_group_t sg,
		xzm_slice_count_t slice_count)
{
	xzm_debug_assert(sg->xzsg_id == XZM_SEGMENT_GROUP_DATA_LARGE);

	xzm_segment_t best_seg, cur_seg, seg_tmp;
	xzm_segment_cache_t cache = &sg->xzsg_cache;
	xzm_chunk_t chunk = NULL;

	_malloc_lock_lock(&cache->xzsc_lock);

	if (cache->xzsc_count == 0) {
		_malloc_lock_unlock(&cache->xzsc_lock);
		return NULL;
	}

	xzm_reclaim_buffer_t buffer = sg->xzsg_main_ref->xzmz_reclaim_buffer;
	while (1) {
		best_seg = NULL;
		TAILQ_FOREACH_SAFE(cur_seg, &cache->xzsc_head, xzs_cache_entry, seg_tmp) {
			if (cur_seg->xzs_slice_count >= slice_count &&
					// allow up to 50% fragmentation
					(cur_seg->xzs_slice_count < (2 * slice_count)) &&
					(best_seg == NULL ||
					cur_seg->xzs_slice_count < best_seg->xzs_slice_count)) {
				if (_xzm_reclaim_is_reusable(buffer,
						cur_seg->xzs_reclaim_id, true)) {
					best_seg = cur_seg;
				} else {
					// Kernel has already reclaimed this entry or
					// is in the process of trying to reclaim it.
					_xzm_segment_group_cache_invalidate(sg, cur_seg);
				}
			}
		}

		if (best_seg == NULL) {
			// Unable to find a suitable entry
			_malloc_lock_unlock(&cache->xzsc_lock);
			return NULL;
		}

		if (_xzm_segment_group_cache_mark_used(sg, best_seg)) {
			// entry has been reclaimed
			break;
		}
	}

	_malloc_lock_unlock(&cache->xzsc_lock);

	// Mark segment as allocated since it has been removed from the cache
	_xzm_segment_table_allocated_at(sg->xzsg_main_ref,
			_xzm_segment_start(best_seg), best_seg, false);

	chunk = (xzm_chunk_t)_xzm_segment_slices_begin(best_seg);

	return chunk;
}

#endif // CONFIG_XZM_DEFERRED_RECLAIM

// mimalloc: mi_segment_huge_page_alloc
static xzm_chunk_t
_xzm_segment_group_alloc_huge_chunk(xzm_segment_group_t sg,
		xzm_slice_count_t slice_count, bool clear, size_t alignment,
		bool purgeable)
{
	if (alignment < XZM_SEGMENT_SIZE) {
		// Huge chunks guarantee segment alignment
		alignment = 0;
	}

	xzm_debug_assert(alignment % XZM_SEGMENT_SIZE == 0);
	__assert_only bool defer_large = sg->xzsg_main_ref->xzmz_defer_large;
	xzm_debug_assert(sg->xzsg_id == XZM_SEGMENT_GROUP_DATA_LARGE ||
			!defer_large);
	xzm_debug_assert(sg->xzsg_id == XZM_SEGMENT_GROUP_DATA || defer_large);

	size_t required_bytes = (size_t)slice_count * XZM_SEGMENT_SLICE_SIZE;
	xzm_chunk_t chunk = NULL;

#if CONFIG_XZM_DEFERRED_RECLAIM
	if (sg->xzsg_id == XZM_SEGMENT_GROUP_DATA_LARGE &&
			sg->xzsg_cache.xzsc_max_count > 0 &&
			slice_count <= sg->xzsg_cache.xzsc_max_entry_slices &&
			alignment <= XZM_SEGMENT_SIZE) {
		chunk = _xzm_segment_group_alloc_huge_chunk_from_cache(sg, slice_count);
		if (chunk) {
			if (clear) {
				size_t chunk_size = 0;
				uint8_t *start = _xzm_chunk_start_ptr(
						&sg->xzsg_main_ref->xzmz_base,
						chunk, &chunk_size);
#if CONFIG_REALLOC_CAN_USE_VMCOPY
				// rdar://140793773
				bzero(start, chunk_size);
#else
				_xzm_segment_group_clear_chunk(sg, start, chunk_size);
#endif
				chunk->xzc_bits.xzcb_is_pristine = true;
			} else {
				chunk->xzc_bits.xzcb_is_pristine = false;
			}
#ifdef DEBUG
			size_t chunk_size = 0;
			uintptr_t start = (uintptr_t)_xzm_chunk_start_ptr(
					&sg->xzsg_main_ref->xzmz_base, chunk, &chunk_size);
			xzm_debug_assert(alignment == 0 || (start % alignment) == 0);
#endif // DEBUG
			return chunk;
		}
	}
#endif // CONFIG_XZM_DEFERRED_RECLAIM
	// huge chunks allocated from the VM are inherently clear
	bool allocated = _xzm_segment_group_alloc_segment(sg, required_bytes,
			alignment, &chunk, purgeable);
	return allocated ? chunk : NULL;
}

static xzm_chunk_t
_xzm_segment_group_alloc_segment_and_chunk(xzm_segment_group_t sg,
		xzm_slice_kind_t kind, xzm_xzone_guard_config_t guard_config,
		xzm_preallocate_list_s *preallocate_list, xzm_slice_count_t slice_count,
		size_t alignment)
{
	xzm_chunk_t chunk = NULL;

	bool allocated = _xzm_segment_group_alloc_segment(sg, 0, 0, NULL, false);
	if (!allocated) {
		goto alloc_done;
	}

	// We hold the main lock again (alloc took it for us).  Since we were
	// able to allocate, we should be sure to get the chunk.
	chunk = _xzm_segment_group_find_and_allocate_chunk(sg, kind, guard_config,
			preallocate_list, slice_count, alignment);
	xzm_debug_assert(chunk);
	_malloc_lock_unlock(&sg->xzsg_lock);

alloc_done:
	_malloc_lock_unlock(&sg->xzsg_alloc_lock);
	return chunk;
}

static void
_xzm_segment_group_bzero_chunk(xzm_segment_group_t sg, uint8_t *start, size_t size)
{
	// Put a ceiling on the amount of memory we dirty at a time
	size_t max_clear_size = KiB(512);

	while (size) {
		size_t next_clear_size = MIN(size, max_clear_size);
		bzero(start, next_clear_size);
		xzm_madvise(&sg->xzsg_main_ref->xzmz_base, start, next_clear_size);

		start += next_clear_size;
		size -= next_clear_size;
	}
}

static void
_xzm_segment_group_clear_chunk(xzm_segment_group_t sg, uint8_t *start, size_t size)
{
#if CONFIG_MADV_ZERO
	if (madvise(start, size, MADV_ZERO)) {
#ifdef DEBUG
		malloc_zone_error(0, false,
				"Failed to madvise(MADV_ZERO) chunk at %p, error: %d\n",
				start, errno);
#endif
		return _xzm_segment_group_bzero_chunk(sg, start, size);
	}
#else
	return _xzm_segment_group_bzero_chunk(sg, start, size);
#endif // CONFIG_MADV_ZERO
}

static void
_xzm_segment_group_overwrite_chunk(uint8_t *start, size_t size,
		xzm_range_group_alloc_flags_t rga_flags)
{
	mach_vm_address_t vm_addr = (mach_vm_address_t)start;
	mach_vm_size_t vm_size = (mach_vm_size_t)size;
	int alloc_flags = VM_FLAGS_OVERWRITE | VM_MAKE_TAG(VM_MEMORY_MALLOC_SMALL);
#if CONFIG_MTE
	if (rga_flags & XZM_RANGE_GROUP_ALLOC_FLAGS_MTE) {
		alloc_flags |= VM_FLAGS_MTE;
	}
#endif
	kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
			/* mask */ 0, alloc_flags, MEMORY_OBJECT_NULL,
			/* offset */ 0, /* copy */ FALSE, VM_PROT_DEFAULT,
			VM_PROT_ALL, VM_INHERIT_DEFAULT);
	if (kr != KERN_SUCCESS) {
		xzm_abort_with_reason("mach_vm_map() overwrite failed", kr);
	}
}

// mimalloc: mi_segments_page_alloc
xzm_chunk_t
xzm_segment_group_alloc_chunk(xzm_segment_group_t sg, xzm_slice_kind_t kind,
		xzm_xzone_guard_config_t guard_config, xzm_slice_count_t slice_count,
		xzm_preallocate_list_s *preallocate_list, size_t alignment, bool clear,
		bool purgeable) {
	if (kind == XZM_SLICE_KIND_HUGE_CHUNK) {
		xzm_debug_assert(guard_config == NULL);
		xzm_debug_assert(preallocate_list == NULL);
		xzm_debug_assert((slice_count >
				XZM_LARGE_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE) ||
				(alignment > XZM_ALIGNMENT_MAX));
		return _xzm_segment_group_alloc_huge_chunk(sg, slice_count, clear,
				alignment, purgeable);
	}
	xzm_debug_assert(kind == XZM_SLICE_KIND_LARGE_CHUNK || alignment == 0);

	// Due to alignment, it's possible for the xzone layer to request a single
	// page large chunk. The segment layer assumes that such chunks can't exist,
	// so we round up the slice count here
	if (kind == XZM_SLICE_KIND_LARGE_CHUNK && slice_count == 1) {
		slice_count = 2;
	}

	// Consider: round up slice_count like mimalloc does?

	// We don't want to hold the main segment group lock while interacting with
	// the VM so that other allocations and deallocations that don't need to can
	// be served concurrently, but we do want to limit ourselves to allocating
	// only one new segment at a time so that we don't overshoot what we need if
	// many threads arrive during a period where a new segment is needed.
	//
	// So, we also have an "allocations lock", and the protocol is that a thread
	// wanting to allocate new VM must acquire it before going off to the VM.

	xzm_chunk_t chunk = NULL;

	_malloc_lock_lock(&sg->xzsg_lock);
	chunk = _xzm_segment_group_find_and_allocate_chunk(sg, kind, guard_config,
			preallocate_list, slice_count, alignment);
	if (chunk) {
		// Happy path: we got the chunk and are done.
		_malloc_lock_unlock(&sg->xzsg_lock);
		goto done;
	}

	// First try didn't succeed, so we need a new segment.  See if we can get
	// the alloc lock to allocate a new segment.
	bool gotlock = _malloc_lock_trylock(&sg->xzsg_alloc_lock);
	if (os_likely(gotlock)) {
		// We got it, so we can try to directly allocate a new segment.
		_malloc_lock_unlock(&sg->xzsg_lock);
		chunk = _xzm_segment_group_alloc_segment_and_chunk(sg, kind,
				guard_config, preallocate_list, slice_count, alignment);
	} else {
		// We didn't get it, so somebody else is allocating.  We need to drop
		// the main lock...
		_malloc_lock_unlock(&sg->xzsg_lock);

		// ... and wait for them on the alloc lock.
		_malloc_lock_lock(&sg->xzsg_alloc_lock);

		// Now that we've got the alloc lock, reacquire the main lock and try to
		// allocate from the new segment that the thread we were waiting for
		// would have installed.
		_malloc_lock_lock(&sg->xzsg_lock);
		chunk = _xzm_segment_group_find_and_allocate_chunk(sg, kind,
				guard_config, preallocate_list, slice_count, alignment);
		_malloc_lock_unlock(&sg->xzsg_lock);

		if (chunk) {
			// We were able to allocate from the new segment.
			_malloc_lock_unlock(&sg->xzsg_alloc_lock);
		} else {
			// The entire new segment has already been exhausted while we were
			// waiting for the alloc lock.  We have it now, so it's our turn to
			// allocate a new segment.
			chunk = _xzm_segment_group_alloc_segment_and_chunk(sg, kind,
					guard_config, preallocate_list, slice_count, alignment);
		}
	}

done:

	if (chunk) {
		size_t chunk_size;
		uint8_t *start = _xzm_chunk_start_ptr(&sg->xzsg_main_ref->xzmz_base,
				chunk, &chunk_size);
#if CONFIG_MTE
		const bool memtag_enabled =
				_xzm_segment_group_memtag_block(sg, chunk_size);
#endif
		if (!chunk->xzc_bits.xzcb_is_pristine) {
			if (_xzm_segment_group_has_madvise_workaround(sg) &&
					kind == XZM_SLICE_KIND_LARGE_CHUNK) {
				xzm_range_group_alloc_flags_t rga_flags = 0;
#if CONFIG_MTE
				if (memtag_enabled) {
					rga_flags |= XZM_RANGE_GROUP_ALLOC_FLAGS_MTE;
				}
#endif
				_xzm_segment_group_overwrite_chunk(start, chunk_size, rga_flags);
				chunk->xzc_bits.xzcb_is_pristine = true;
			} else if (clear) {
				// TODO: is this the right cutoff?
				if (kind == XZM_SLICE_KIND_TINY_CHUNK) {
					// It's just one page that we're going to fault anyway
					bzero(start, chunk_size);
				} else {
					_xzm_segment_group_clear_chunk(sg, start, chunk_size);
				}

				chunk->xzc_bits.xzcb_is_pristine = true;
			}
		}

		if (os_unlikely(purgeable)) {
			xzm_debug_assert(guard_config == NULL);
			xzm_debug_assert(kind == XZM_SLICE_KIND_LARGE_CHUNK);
			mach_vm_address_t vm_addr = (mach_vm_address_t)start;
			mach_vm_size_t vm_size = (mach_vm_size_t)chunk_size;
			int alloc_flags = VM_FLAGS_OVERWRITE |
					VM_MAKE_TAG(VM_MEMORY_MALLOC_SMALL) | VM_FLAGS_PURGABLE;
#if CONFIG_MTE
			if (memtag_enabled) {
				alloc_flags |= VM_FLAGS_MTE;
			}
#endif
			kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
					/* mask */ 0, alloc_flags, MEMORY_OBJECT_NULL,
					/* offset */ 0, /* copy */ FALSE, VM_PROT_DEFAULT,
					VM_PROT_ALL, VM_INHERIT_DEFAULT);
			if (kr != KERN_SUCCESS) {
				xzm_abort_with_reason("mach_vm_map() overwrite failed", kr);
			}
		}
	}

	return chunk;
}

// mimalloc: mi_segment_span_remove_from_queue
static void
_xzm_segment_group_segment_span_remove_from_queue(xzm_segment_group_t sg,
		xzm_free_span_t span, xzm_slice_count_t slice_count)
{
	(void)sg; (void)slice_count;
	LIST_REMOVE(span, xzc_entry);
}

// mimalloc: mi_segment_span_free_coalesce
//
// TODO: more nuanced policy for zero-tracking
// - Right now we do the easy thing, which is to mark the entire coalesced free
//   span as dirty because the chunk being deallocated is
// - However, that's probably not optimal if we're coalescing something small
//   with a very large free span - e.g. the initial pristine span
// - One possibility would be to compare the sizes of the chunk being freed and
//   the spans being coalesced with - if the spans we're coalescing with are
//   relatively large and already zero-initialized, it may be better to just
//   zero the chunk being freed and maintain the zero initialization of the new
//   span as a whole
// - The risk of that, though, is that we may waste time zeroing chunks that
//   aren't going to wind up being used to serve cleared allocations anyway
static xzm_free_span_t
_xzm_segment_group_segment_span_free_coalesce(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_chunk_t chunk, bool *success_out)
{
	xzm_slice_count_t slice_count;
	if (_xzm_slice_kind_is_chunk(chunk->xzc_bits.xzcb_kind)) {
		slice_count = _xzm_chunk_slice_count(chunk);
	} else if (_xzm_slice_kind_is_free_span(chunk->xzc_bits.xzcb_kind)) {
		slice_count = _xzm_free_span_slice_count(chunk);
	} else {
		xzm_abort("attempting to coalesce slice of unexpected type");
	}

	xzm_free_span_t span = chunk;

	if (success_out) {
		*success_out = true;
	}

	// "unpublish" the chunk for enumeration as early as possible by resetting
	// its kind
	span->xzc_bits.xzcb_kind = XZM_SLICE_KIND_INVALID;

	xzm_slice_t next = chunk + slice_count;
	if (next < _xzm_segment_slices_end(segment) &&
			_xzm_slice_kind_is_free_span(next->xzc_bits.xzcb_kind)) {
#if CONFIG_XZM_DEFERRED_RECLAIM
		if (_xzm_segment_group_uses_deferred_reclamation(sg)) {
			if (!_xzm_segment_group_span_mark_used(sg, next)) {
				if (success_out) {
					*success_out = false;
				}
				goto previous;
			}
		}
#endif // CONFIG_XZM_DEFERRED_RECLAIM
		xzm_slice_count_t next_slice_count = _xzm_free_span_slice_count(next);
		slice_count += next_slice_count; // extend
		_xzm_segment_group_segment_span_remove_from_queue(sg, next,
				next_slice_count);
	}

#if CONFIG_XZM_DEFERRED_RECLAIM
previous:
#endif // CONFIG_XZM_DEFERRED_RECLAIM
	if (span > _xzm_segment_slices_begin(segment)) {
		xzm_slice_t prev = _xzm_span_slice_first(span - 1);
		if (_xzm_slice_kind_is_free_span(prev->xzc_bits.xzcb_kind)) {
#if CONFIG_XZM_DEFERRED_RECLAIM
			if (_xzm_segment_group_uses_deferred_reclamation(sg)) {
				if (!_xzm_segment_group_span_mark_used(sg, prev)) {
					if (success_out) {
						*success_out = false;
					}
					goto done;
				}
			}
#endif // CONFIG_XZM_DEFERRED_RECLAIM
			xzm_slice_count_t prev_slice_count =
					_xzm_free_span_slice_count(prev);
			slice_count += prev_slice_count;
			_xzm_segment_group_segment_span_remove_from_queue(sg, prev,
					prev_slice_count);
			span = prev;
		}
	}

#if CONFIG_XZM_DEFERRED_RECLAIM
done:
#endif // CONFIG_XZM_DEFERRED_RECLAIM
	// and add the new free span
	_xzm_segment_group_segment_span_free(sg, segment,
			_xzm_slice_index(segment, span), slice_count, true, false);
	return span;
}

static void
_xzm_segment_group_segment_deallocate(xzm_segment_group_t sg,
		xzm_segment_t segment, bool free_from_table)
{
	// Remove the segment from the segment map
	if (free_from_table) {
		_xzm_segment_table_freed_at(sg->xzsg_main_ref,
				_xzm_segment_start(segment), segment, true);
	}

	size_t size = segment->xzs_slice_count * XZM_SEGMENT_SLICE_SIZE;
	xzm_range_group_free_segment_body(sg->xzsg_range_group,
			_xzm_segment_start(segment), size, mvm_plat_map(segment->xzs_map));
	xzm_metapool_free(&sg->xzsg_main_ref->xzmz_metapools[XZM_METAPOOL_SEGMENT],
			segment);
}

// mimalloc: mi_segment_free
static void
_xzm_segment_group_segment_free(xzm_segment_group_t sg, xzm_segment_t segment)
{
	xzm_debug_assert(segment->xzs_used == 0);
	xzm_free_span_t span = _xzm_segment_slices_begin(segment);

#if CONFIG_XZM_DEFERRED_RECLAIM
	xzm_free_span_t next;
	if (_xzm_segment_group_uses_deferred_reclamation(sg)) {
		if (!_xzm_segment_group_span_mark_used(sg, span)) {
			// kernel is holding this span busy
			goto fail;
		}
		while (_xzm_free_span_slice_count(span) < _xzm_segment_slice_count(segment)) {
			bool success;
			_xzm_segment_group_segment_span_remove_from_queue(sg, span,
					span->xzcs_slice_count);
			span = _xzm_segment_group_segment_span_free_coalesce(sg, segment,
					span, &success);
			if (!success) {
				// kernel is holding an adjacent span busy
				goto fail;
			}
		}
	}
#endif // CONFIG_XZM_DEFERRED_RECLAIM

	// The segment should have exactly one free span, which we need to now
	// remove from its span queue
	xzm_debug_assert(span->xzc_bits.xzcb_kind == XZM_SLICE_KIND_MULTI_FREE);
	xzm_debug_assert(span->xzcs_slice_count == segment->xzs_slice_count);

	_xzm_segment_group_segment_span_remove_from_queue(sg, span,
			span->xzcs_slice_count);

	// Drop the segment group lock before going off to the VM
	_malloc_lock_unlock(&sg->xzsg_lock);

	_xzm_segment_group_segment_deallocate(sg, segment, true);
	return;

#if CONFIG_XZM_DEFERRED_RECLAIM
fail:;
	// Kernel is holding a span busy, place any re-used spans back in the
	// buffer.
	next = _xzm_segment_slices_begin(segment);
	do {
		span = next;
		if (!_xzm_segment_slice_is_deferred(segment, span)) {
			_xzm_segment_group_span_mark_free(sg, span);
		}
		next = span + _xzm_free_span_slice_count(span);
	} while (next < _xzm_segment_slices_end(segment));
	_malloc_lock_unlock(&sg->xzsg_lock);
	return;
#endif // CONFIG_XZM_DEFERRED_RECLAIM
}

// trim unneeded space off the end of a huge segment
static void
_xzm_segment_group_split_huge_segment(xzm_segment_group_t sg, xzm_segment_t segment,
		xzm_slice_count_t required_slices)
{
	xzm_debug_assert(segment->xzs_kind == XZM_SEGMENT_KIND_HUGE);
	xzm_debug_assert(segment->xzs_slice_count >= required_slices);
	if (segment->xzs_slice_count == required_slices) {
		return;
	}

	uint8_t *start = _xzm_segment_start(segment);
	uint8_t *end = _xzm_segment_end(segment);

	uint8_t *remainder = (uint8_t *)(start +
			required_slices * XZM_SEGMENT_SLICE_SIZE);
	if (remainder < end) {
		size_t total_remainder_size = (size_t)(end - remainder);
#if CONFIG_XZM_DEFERRED_RECLAIM
		// new segments must be created on a SEGMENT_SIZE boundary to be annotated
		// in the segment table
		uint8_t *remainder_seg = (uint8_t *)roundup((uintptr_t)remainder,
				XZM_SEGMENT_SIZE);
		xzm_metapool_t metapool =
				&sg->xzsg_main_ref->xzmz_metapools[XZM_METAPOOL_SEGMENT];
		xzm_segment_t remainder_metadata = xzm_metapool_alloc(metapool);
		size_t remainder_seg_size = (end - remainder_seg);

		// If the remainder that we're freeing spans a segment granule, we need
		// to clear the entries from the segment map
		if (remainder_seg < end) {
			_xzm_segment_table_freed_at(sg->xzsg_main_ref, remainder_seg,
					segment, false);
		}

		_malloc_lock_lock(&sg->xzsg_cache.xzsc_lock);
		if (remainder_seg < end &&
				remainder_seg_size > XZM_LARGE_BLOCK_SIZE_MAX &&
				sg->xzsg_cache.xzsc_count < sg->xzsg_cache.xzsc_max_count) {
			// create a new segment from the end of this one and add it back to
			// the cache

			_xzm_segment_group_init_segment(sg, remainder_metadata,
					remainder_seg, remainder_seg_size, true, false);
			_xzm_segment_group_cache_mark_free(sg, remainder_metadata);

			_malloc_lock_unlock(&sg->xzsg_cache.xzsc_lock);

			if (remainder_seg > remainder) {
				// free the unused portion of the current segment
				size_t remainder_size = total_remainder_size -
						remainder_seg_size;
				xzm_range_group_free_segment_body(sg->xzsg_range_group,
						(void *)remainder, remainder_size, NULL);
			}
		} else {
			_malloc_lock_unlock(&sg->xzsg_cache.xzsc_lock);
			// cannot create a cached segment out of the remainder,
			// free it instead.
			xzm_metapool_free(metapool, remainder_metadata);
			xzm_range_group_free_segment_body(sg->xzsg_range_group,
					(void *)remainder, total_remainder_size, NULL);
		}
#else // CONFIG_XZM_DEFERRED_RECLAIM
		uint8_t *remainder_seg = (uint8_t *)roundup((uintptr_t)remainder,
				XZM_SEGMENT_SIZE);
		// If the body that we're freeing spans a segment granule, we need to
		// clear the entries from the segment map
		if (remainder_seg < end) {
			_xzm_segment_table_freed_at(sg->xzsg_main_ref, remainder_seg,
					segment, false);
		}
		xzm_range_group_free_segment_body(sg->xzsg_range_group, (void *)remainder,
				total_remainder_size, NULL);
#endif // CONFIG_XZM_DEFERRED_RECLAIM
		// re-initialize original segment with reduced slice count
		_xzm_segment_group_init_segment(sg, segment,
				_xzm_segment_start(segment),
				required_slices * XZM_SEGMENT_SLICE_SIZE, true, false);
	}
	xzm_debug_assert(_xzm_segment_end(segment) == remainder);
}

#if CONFIG_XZM_DEFERRED_RECLAIM

static bool
_xzm_segment_group_free_huge_chunk_to_cache(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_chunk_t chunk) {
	xzm_segment_cache_t cache = &sg->xzsg_cache;
	xzm_segment_t entry = NULL, tmp_entry = NULL;
	xzm_debug_assert(cache->xzsc_max_count > 0);

	if (segment->xzs_slice_count > cache->xzsc_max_entry_slices) {
		// Do this check (and all others that can cause us to return false)
		// before modifying the segment table
		return false;
	}

	// The data for this segment could be asynchronously reclaimed and reused
	// before the metadata is invalidated/removed from the segment table, so we
	// need to remove this segment from the segment table before putting it into
	// the cache. If reused, the segment will be marked allocated in
	// _xzm_segment_group_alloc_huge_chunk_from_cache
	_xzm_segment_table_freed_at(sg->xzsg_main_ref,
			_xzm_segment_start(segment), segment, true);

#if CONFIG_MTE
	// We are committed to returning the chunk to the cache and have removed
	// access to it from the segment table.  We can safely retag now before taking
	// the cache lock.
	if (_xzm_segment_group_memtag_enabled(sg)) {
		size_t chunk_size = 0;
		void *ptr = _xzm_chunk_start_ptr(
				&sg->xzsg_main_ref->xzmz_base, chunk, &chunk_size);
		memtag_tag_canonical(ptr, chunk_size);
		// Note: for better protection from canonical pointers into huge chunks we
		// could retag with a random tag here (which will require code changes on
		// the alloc path also).
	}
#endif

	_malloc_lock_lock(&cache->xzsc_lock);

	xzm_reclaim_buffer_t buffer = sg->xzsg_main_ref->xzmz_reclaim_buffer;

	if (sg->xzsg_cache.xzsc_count == sg->xzsg_cache.xzsc_max_count) {
		// cache is full, sweep through the cache to find invalid entries
		TAILQ_FOREACH_SAFE(entry, &sg->xzsg_cache.xzsc_head,
				xzs_cache_entry, tmp_entry) {
			if (!_xzm_reclaim_is_reusable(buffer,
					entry->xzs_reclaim_id, true)) {
				_xzm_segment_group_cache_invalidate(sg, entry);
				continue;
			} else {
				// cache entries are kept in LRU order - encountering an
				// available one implies all other cache entries are also
				// available
				break;
			}
		}
	}

	while (cache->xzsc_count == cache->xzsc_max_count) {
		// Cache is full, evict the oldest entry
		_xzm_segment_group_cache_evict(sg);
	}

	// insert segment into cache
	_xzm_segment_group_cache_mark_free(sg, segment);
	_malloc_lock_unlock(&cache->xzsc_lock);
	return true;
}

#endif // CONFIG_XZM_DEFERRED_RECLAIM

// mimalloc: _mi_segment_huge_page_free
static void
_xzm_segment_group_free_huge_chunk(xzm_segment_group_t sg, xzm_chunk_t chunk,
		bool purgeable)
{
	xzm_segment_t segment = _xzm_segment_for_slice(
			&sg->xzsg_main_ref->xzmz_base, chunk);
	xzm_debug_assert(segment->xzs_kind == XZM_SEGMENT_KIND_HUGE);
	xzm_debug_assert(segment->xzs_used == 1);

#if CONFIG_XZM_DEFERRED_RECLAIM
	if (sg->xzsg_cache.xzsc_max_count > 0 &&
			!purgeable &&
			segment->xzs_slice_count <= sg->xzsg_cache.xzsc_max_entry_slices &&
			segment->xzs_slice_count >
			(XZM_LARGE_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE)) {
		if (_xzm_segment_group_free_huge_chunk_to_cache(sg, segment, chunk)) {
			return;
		}
	}
#else
	// No special handling of purgeable huge segments without the huge cache
	(void)purgeable;
#endif // CONFIG_XZM_DEFERRED_RECLAIM

	_xzm_segment_group_segment_deallocate(sg, segment, true);
}

static void
xzm_madvise(xzm_malloc_zone_t zone, uint8_t *start, size_t size)
{
	__assert_only int rc = mvm_madvise_plat(start, size, MADV_FREE_REUSABLE, 0,
			mvm_plat_map(xzm_segment_table_query(_xzm_malloc_zone_main(zone),
			start)->xzs_map));

#ifdef DEBUG
	if (rc) {
		// TODO: time for a compatibility break?  Make this fatal?
		malloc_zone_error(0, false,
				"Failed to madvise chunk at %p, error: %d\n", start, errno);
	}
#endif // DEBUG
}

void
xzm_segment_group_segment_madvise_span(xzm_segment_group_t sg,
		uint8_t *slice_start, xzm_slice_count_t count)
{
	xzm_debug_assert((uintptr_t)slice_start % XZM_SEGMENT_SLICE_SIZE == 0);
	size_t span_size = count * XZM_SEGMENT_SLICE_SIZE;
	xzm_madvise(&sg->xzsg_main_ref->xzmz_base, slice_start, span_size);
}

void
xzm_segment_group_segment_madvise_chunk(xzm_segment_group_t sg,
		xzm_chunk_t chunk)
{
	xzm_debug_assert(_xzm_slice_kind_is_chunk(chunk->xzc_bits.xzcb_kind));

	size_t chunk_size = 0;
	uint8_t *start = _xzm_chunk_start_ptr(&sg->xzsg_main_ref->xzmz_base, chunk,
			&chunk_size);
	xzm_madvise(&sg->xzsg_main_ref->xzmz_base, start, chunk_size);
}

// mimalloc: _mi_segment_page_free
void
xzm_segment_group_free_chunk(xzm_segment_group_t sg, xzm_chunk_t chunk,
		bool purgeable, bool small_madvise_needed)
{
	xzm_slice_kind_t kind = chunk->xzc_bits.xzcb_kind;
	xzm_debug_assert(_xzm_slice_kind_is_chunk(kind));

	if (kind == XZM_SLICE_KIND_HUGE_CHUNK) {
		_xzm_segment_group_free_huge_chunk(sg, chunk, purgeable);
		return;
	}

	size_t chunk_size = 0;
	uint8_t *start = _xzm_chunk_start_ptr(
			&sg->xzsg_main_ref->xzmz_base, chunk, &chunk_size);
	xzm_range_group_alloc_flags_t rga_flags = 0;
#if CONFIG_MTE
	if (_xzm_segment_group_memtag_enabled(sg)) {
		rga_flags |= XZM_RANGE_GROUP_ALLOC_FLAGS_MTE;
		// Clear tags for chunk before handing it back to segment group
		memtag_tag_canonical(start, chunk_size);
	}
#endif

	if (os_unlikely(purgeable)) {
		xzm_debug_assert(kind == XZM_SLICE_KIND_LARGE_CHUNK);
		// Remove the purgeability from this allocation before freeing back to
		// the segment
		_xzm_segment_group_overwrite_chunk(start, chunk_size, rga_flags);
	}

	xzm_segment_t segment = _xzm_segment_for_slice(
			&sg->xzsg_main_ref->xzmz_base, chunk);

	if (_xzm_segment_group_has_madvise_workaround(sg) &&
			kind == XZM_SLICE_KIND_LARGE_CHUNK) {
		_xzm_segment_group_overwrite_chunk(start, chunk_size, rga_flags);
	} else if (!_xzm_segment_group_uses_deferred_reclamation(sg) &&
			// Small chunks will have already been aggressively madvised
			// by the time they are free
			(kind != XZM_SLICE_KIND_SMALL_CHUNK || small_madvise_needed)) {
		xzm_segment_group_segment_madvise_chunk(sg, chunk);
	}

	_malloc_lock_lock(&sg->xzsg_lock);

	xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
#if CONFIG_XZM_DEFERRED_RECLAIM
	xzm_debug_assert(!(_xzm_segment_group_uses_deferred_reclamation(sg) &&
			_xzm_segment_slice_is_deferred(segment, chunk)));
#endif // CONFIG_XZM_DEFERRED_RECLAIM

	xzm_free_span_t span = _xzm_segment_group_segment_span_free_coalesce(sg, segment, chunk, NULL);
	segment->xzs_used--;
#if !CONFIG_XZM_DEFERRED_RECLAIM
	(void)span;
#endif // !CONFIG_XZM_DEFERRED_RECLAIM

	xzm_debug_assert(kind != XZM_SLICE_KIND_HUGE_CHUNK);
	const bool can_deallocate = sg->xzsg_main_ref->xzmz_deallocate_segment &&
			_xzm_segment_group_id_is_data(segment->xzs_segment_group->xzsg_id);
	if (segment->xzs_used == 0 && can_deallocate) {
		// Drops the segment group lock
		_xzm_segment_group_segment_free(sg, segment);
	} else {
#if CONFIG_XZM_DEFERRED_RECLAIM
		if (_xzm_segment_group_uses_deferred_reclamation(sg)) {
			_xzm_segment_group_span_mark_free(sg, span);
		}
#endif // CONFIG_XZM_DEFERRED_RECLAIM
		// TODO: sequester segments more efficiently - just leaving the final
		// whole-segment span in its span queue means its metadata page stays
		// dirty
		xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
		_malloc_lock_unlock(&sg->xzsg_lock);
	}
}

bool
xzm_segment_group_try_realloc_large_chunk(xzm_segment_group_t sg,
		xzm_segment_t segment, xzm_chunk_t chunk,
		xzm_slice_count_t new_slice_count)
{
	xzm_debug_assert(_xzm_segment_for_slice(&sg->xzsg_main_ref->xzmz_base,
			chunk) == segment);
	xzm_debug_assert(new_slice_count >
			(XZM_SMALL_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE));
	xzm_debug_assert(new_slice_count <=
			(XZM_LARGE_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE));
	xzm_debug_assert(chunk->xzc_bits.xzcb_kind == XZM_SLICE_KIND_LARGE_CHUNK);

	if (chunk->xzcs_slice_count < new_slice_count) {
		_malloc_lock_lock(&sg->xzsg_lock);
		xzm_slice_count_t slices_to_add = (new_slice_count -
				chunk->xzcs_slice_count);
		xzm_slice_t next_slice = chunk + chunk->xzcs_slice_count;
		xzm_slice_count_t next_free_slices = _xzm_free_span_slice_count(next_slice);
		// Check if adjacent chunk is in the right segment, free, and
		// large enough to realloc into
		if (next_slice >= _xzm_segment_slices_end(segment) ||
				!_xzm_slice_kind_is_free_span(next_slice->xzc_bits.xzcb_kind) ||
				next_free_slices < slices_to_add) {
			_malloc_lock_unlock(&sg->xzsg_lock);
			return false;
		}

		const xzm_slice_count_t next_slices_to_free =
				next_free_slices - slices_to_add;
		bool uses_dr = false;
#if CONFIG_XZM_DEFERRED_RECLAIM
		uses_dr = _xzm_segment_group_uses_deferred_reclamation(sg);
		if (uses_dr) {
			if (!_xzm_segment_group_span_mark_smaller(sg, next_slice, 0,
					slices_to_add, next_slices_to_free)) {
				// kernel is holding next span busy
				_malloc_lock_unlock(&sg->xzsg_lock);
				return false;
			}
		}
#endif // CONFIG_XZM_DEFERRED_RECLAIM

		_xzm_segment_group_segment_span_remove_from_queue(sg, next_slice,
				next_free_slices);

		// We can only split if there will be 1 or more free slices left over
		if (next_slices_to_free) {
			_xzm_segment_group_segment_slice_split(sg, segment, next_slice,
					slices_to_add, uses_dr, false);
		}

		for (int i = 0; i < slices_to_add; i++) {
			next_slice[i].xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
			next_slice[i].xzsl_slice_offset_bytes = (uint32_t)
					(((uintptr_t)&next_slice[i]) - ((uintptr_t)chunk));
		}
		chunk->xzcs_slice_count = new_slice_count;
		xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
		_malloc_lock_unlock(&sg->xzsg_lock);

#if CONFIG_MTE
		// If block grows on realloc(), tag additional size with same tag as
		// allocation.
		if (_xzm_segment_group_memtag_enabled(sg)) {
			size_t additional_size = (slices_to_add * XZM_SEGMENT_SLICE_SIZE);
			size_t chunk_size;
			void *start = (void *)_xzm_chunk_start(
					&sg->xzsg_main_ref->xzmz_base, chunk, &chunk_size);
			size_t offset = chunk_size - additional_size;
			void *additional_start = _memtag_load_tag(start) + offset;
			memtag_set_tag(additional_start, additional_size);
		}
#endif

		return true;
	} else if (chunk->xzcs_slice_count > new_slice_count) {
		_malloc_lock_lock(&sg->xzsg_lock);

		xzm_slice_count_t slices_to_free = (chunk->xzcs_slice_count -
				new_slice_count);
		xzm_free_span_t span_to_free = chunk + new_slice_count;

		chunk->xzcs_slice_count = new_slice_count;

		xzm_slice_t last_slice = chunk + (chunk->xzcs_slice_count - 1);
		last_slice->xzc_bits.xzcb_kind = XZM_SLICE_KIND_MULTI_BODY;
		last_slice->xzsl_slice_offset_bytes = (uint32_t)
				(((uintptr_t)last_slice) - ((uintptr_t)chunk));

		// create a fake chunk out of the remainder before freeing it
		xzm_segment_t segment = _xzm_segment_for_slice(
				&sg->xzsg_main_ref->xzmz_base, chunk);
		xzm_slice_kind_t tail_kind = slices_to_free > 1 ?
				XZM_SLICE_KIND_LARGE_CHUNK : XZM_SLICE_KIND_TINY_CHUNK;
		_xzm_segment_group_segment_span_mark_allocated(sg, segment, tail_kind,
				_xzm_slice_index(segment, span_to_free), slices_to_free);
		_malloc_lock_unlock(&sg->xzsg_lock);
		// Realloc in place is disabled for the purgeable zone, so we can always
		// pass purgeable=false here
		xzm_segment_group_free_chunk(sg, span_to_free, false, false);
		return true;
	}
	return true; // old size == new size, so no-op
}

bool
xzm_segment_group_try_realloc_huge_chunk(xzm_segment_group_t sg,
		xzm_malloc_zone_t zone, xzm_segment_t segment,
		xzm_chunk_t chunk, xzm_slice_count_t new_slice_count)
{
	xzm_debug_assert(_xzm_segment_for_slice(&sg->xzsg_main_ref->xzmz_base,
			chunk) == segment);
	xzm_debug_assert(new_slice_count >
			(XZM_LARGE_BLOCK_SIZE_MAX / XZM_SEGMENT_SLICE_SIZE));
	xzm_debug_assert(chunk->xzc_bits.xzcb_kind == XZM_SLICE_KIND_HUGE_CHUNK);


	if (chunk->xzcs_slice_count < new_slice_count) {
		size_t current_size = 0;
		vm_address_t current_ptr = (vm_address_t)_xzm_chunk_start(zone, chunk,
				&current_size);
		vm_address_t addr_to_request = current_ptr + current_size;
		size_t slices_to_request = new_slice_count - chunk->xzcs_slice_count;
		size_t size_to_request = slices_to_request * XZM_SEGMENT_SLICE_SIZE;

		uintptr_t segment_to_check = roundup(addr_to_request, XZM_SEGMENT_SIZE);
		while (segment_to_check < (addr_to_request+size_to_request)) {
			// TODO: Once we have deferred reclaim for huge chunks, we have the
			// option to do something more clever here (e.g. if all segments
			// are unallocated or are still waiting to be reclaimed, then we
			// can acquire those and realloc)
			if (xzm_segment_table_query(sg->xzsg_main_ref,
					(void *)segment_to_check)) {
				return false;
			}
			segment_to_check += XZM_SEGMENT_SIZE;
		}

		int label = VM_MEMORY_REALLOC;
		void *addr = mvm_allocate_plat(addr_to_request, size_to_request,
				0, VM_FLAGS_FIXED, 0, label, mvm_plat_map(segment->xzs_map));
		if (addr) {
			size_t new_body_size = new_slice_count * XZM_SEGMENT_SLICE_SIZE;
			_xzm_segment_group_init_segment(sg, segment,
					_xzm_segment_start(segment), new_body_size, true, false);

			// If we expanded into new segment granules, mark them as allocated
			uintptr_t first_new_segment = roundup(addr_to_request,
					XZM_SEGMENT_SIZE);
			if ((uintptr_t)current_ptr + new_body_size > first_new_segment) {
				_xzm_segment_table_allocated_at(_xzm_malloc_zone_main(zone),
						(void *)first_new_segment, segment, false);
#if CONFIG_MTE
				// If block grows on realloc(), tag additional size with same tag as
				// allocation.
				if (_xzm_segment_group_memtag_enabled(sg)) {
					void *tagged_addr_to_request =
							_memtag_load_tag((void *)current_ptr) +
							current_size;
					memtag_set_tag(tagged_addr_to_request, size_to_request);
				}
#endif
			}

			xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
			return true;
		}
		return false;
	} else if (chunk->xzcs_slice_count > new_slice_count) {
		_xzm_segment_group_split_huge_segment(sg, segment, new_slice_count);
		xzm_debug_assert(_xzm_segment_group_segment_is_valid(sg, segment));
		return true;
	}
	return true; // old size == new size, so no-op
}

#endif // CONFIG_XZONE_MALLOC