Loading...
gen/scalable_malloc.c Libc-498 Libc-262
--- Libc/Libc-498/gen/scalable_malloc.c
+++ Libc/Libc-262/gen/scalable_malloc.c
@@ -1,3767 +1,1372 @@
 /*
- * Copyright (c) 1999, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
  * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
 
 /* Author: Bertrand Serlet, August 1999 */
 
-#include "scalable_malloc.h"
-#include "malloc_printf.h"
-#include "_simple.h"
-
-#include <pthread_internals.h>
-
-#include <unistd.h>
-#include <libc.h>
+#import "scalable_malloc.h"
+
+#define __POSIX_LIB__
+#import <unistd.h>
+#import <pthread_internals.h>	// for spin lock
+#import <libc.h>
 #include <mach/vm_statistics.h>
-#include <mach/mach_init.h>
-#include <sys/types.h>
-#include <sys/mman.h>
 
 /*********************	DEFINITIONS	************************/
 
+static unsigned vm_page_shift = 0; // guaranteed to be intialized by zone creation
+
 #define DEBUG_MALLOC	0	// set to one to debug malloc itself
-
 #define DEBUG_CLIENT	0	// set to one to help debug a nasty memory smasher
 
 #if DEBUG_MALLOC
-#warning DEBUG_MALLOC ENABLED
-# define INLINE
-# define ALWAYSINLINE
-# define CHECK_LOCKED(szone, fun)						\
-do {										\
-    if (__is_threaded && TRY_LOCK(szone->lock)) {				\
-	malloc_printf("*** lock was not set %p in %s\n", szone->lock, fun);	\
-    }										\
-} while (0)
+#warning DEBUG ENABLED
+#define INLINE
 #else
-# define INLINE	__inline__
-# define ALWAYSINLINE __attribute__((always_inline))
-# define CHECK_LOCKED(szone, fun)	{}
-#endif
-
-/*
- * Access to global variables is slow, so optimise our handling of vm_page_size
- * and vm_page_shift.
- */
-#define _vm_page_size 	vm_page_size	/* to get to the originals */
-#define _vm_page_shift	vm_page_shift
-#define vm_page_size	4096		/* our normal working sizes */
-#define vm_page_shift	12
-
-typedef unsigned short msize_t; // a size in multiples of SHIFT_SMALL_QUANTUM or SHIFT_TINY_QUANTUM
-
-typedef union {
- 	void 	   *p;
- 	uintptr_t 	u;
-} ptr_union;
-
-typedef struct {
-	ptr_union previous;
-	ptr_union next;
-} free_list_t;
-
-typedef struct {
-    uintptr_t 	address_and_num_pages;
-    // this type represents both an address and a number of pages
-    // the low bits are the number of pages; the high bits are the address
-    // note that the exact number of bits used for depends on the page size
-    // also, this cannot represent pointers larger than 1 << (vm_page_shift * 2)
-} compact_range_t;
-
-typedef unsigned char	grain_t;
+#define INLINE	inline
+#endif
 
 #define CHECK_REGIONS			(1 << 31)
-
-#define MAX_RECORDER_BUFFER	256
-
-/*********************	DEFINITIONS for tiny	************************/
-
-/*
- * Memory in the Tiny range is allocated from regions (heaps) pointed to by the szone's tiny_regions
- * pointer.
- *
- * Each region is laid out as a heap, followed by a header block, all within
- * a 1MB (2^20) block.  This means there are 64520 16-byte blocks and the header is
- * 16138 bytes, making the total 1048458 bytes, leaving 118 bytes unused.
- * The header block is arranged:
- *
- * 0xfc080
- * 	header bits
- * 0xfe001
- *	0xffffffff pad word
- * 0xfe005
- *	in-use bits
- * 0xfff86
- *	pad word (not written)
- * 0xfff8a-0xfffff
- *	unused
- *
- * Each bitfield comprises NUM_TINY_BLOCKS bits, and refers to the corresponding TINY_QUANTUM block
- * within the heap.
- *
- * The bitfields are used to encode the state of memory within the heap.  The header bit indicates
- * that the corresponding quantum is the first quantum in a block (either in use or free).  The
- * in-use bit is set for the header if the block has been handed out (allocated).  If the header
- * bit is not set, the in-use bit is invalid.
- *
- * The szone maintains an array of 32 freelists, each of which is used to hold 
- * free objects of the corresponding quantum size.
- *
- * A free block is laid out depending on its size, in order to fit all free
- * blocks in 16 bytes, on both 32 and 64 bit platforms.  One quantum blocks do
- * not store their size in the block, instead relying on the header information
- * to determine their size.  Blocks of two or more quanta have room to store
- * their size in the block, and store it both after the 'next' pointer, and in
- * the last 2 bytes of the block.
- *
- * 1-quantum block
- * Offset (32-bit mode)	(64-bit mode)
- * 0x0          0x0      : previous
- * 0x4          0x08     : next
- * end          end
- *
- * >1-quantum block
- * Offset (32-bit mode)	(64-bit mode)
- * 0x0          0x0      : previous
- * 0x4          0x08     : next
- * 0x8          0x10     : size (in quantum counts)
- * end - 2      end - 2  : size (in quantum counts)
- * end          end
- *
- * All fields are pointer-sized, except for the size which is an unsigned short.
- *
- */
-
-#define SHIFT_TINY_QUANTUM			4	// Required for AltiVec
-#define	TINY_QUANTUM				(1 << SHIFT_TINY_QUANTUM)
-
-#define FOLLOWING_TINY_PTR(ptr,msize)		(((unsigned char *)(ptr)) + ((msize) << SHIFT_TINY_QUANTUM))
-
-#define NUM_TINY_SLOTS				32	// number of slots for free-lists
-
-#define NUM_TINY_BLOCKS				64520
-#define SHIFT_TINY_CEIL_BLOCKS			16 // ceil(log2(NUM_TINY_BLOCKS))
-#define NUM_TINY_CEIL_BLOCKS			(1 << SHIFT_TINY_CEIL_BLOCKS)
-#define TINY_BLOCKS_ALIGN			(SHIFT_TINY_CEIL_BLOCKS + SHIFT_TINY_QUANTUM)
-
-/*
- * Enough room for the data, followed by the bit arrays (2-bits per block) plus 2 words of padding
- * as our bitmap operators overflow, plus rounding to the nearest page.
- */
-#define TINY_HEADER_SIZE	((NUM_TINY_BLOCKS >> 2) + 8)
-#define TINY_REGION_SIZE	((NUM_TINY_BLOCKS * TINY_QUANTUM + TINY_HEADER_SIZE + vm_page_size - 1) & ~ (vm_page_size - 1))
-
-/*
- * Beginning and end pointers for a region's heap.
- */
-#define TINY_REGION_ADDRESS(region)		((void *)(region))
-#define TINY_REGION_END(region)			(TINY_REGION_ADDRESS(region) + (NUM_TINY_BLOCKS * TINY_QUANTUM))
-
-/*
- * Locate the heap base for a pointer known to be within a tiny region.
- */
-#define TINY_REGION_FOR_PTR(_p)			((void *)((uintptr_t)(_p) & ~((1 << TINY_BLOCKS_ALIGN) - 1)))
-
-/*
- * Convert between byte and msize units.
- */
-#define TINY_BYTES_FOR_MSIZE(_m)		((_m) << SHIFT_TINY_QUANTUM)
-#define TINY_MSIZE_FOR_BYTES(_b)		((_b) >> SHIFT_TINY_QUANTUM)
-
-#ifdef __LP64__
-# define TINY_FREE_SIZE(ptr)			(((msize_t *)(ptr))[8])
-#else
-# define TINY_FREE_SIZE(ptr)			(((msize_t *)(ptr))[4])
-#endif
-#define TINY_PREVIOUS_MSIZE(ptr)		((msize_t *)(ptr))[-1]
-
-/*
- * Locate the block header for a pointer known to be within a tiny region.
- */
-#define TINY_HEADER_START			(NUM_TINY_BLOCKS * TINY_QUANTUM)
-#define TINY_BLOCK_HEADER_FOR_PTR(_p)		((void *)((uintptr_t)TINY_REGION_FOR_PTR(_p) + TINY_HEADER_START))
-
-/*
- * Locate the inuse map for a given block header pointer.
- */  
-#define TINY_INUSE_FOR_HEADER(_h)		((void *)((uintptr_t)(_h) + (NUM_TINY_BLOCKS >> 3) + 4))
-
-/*
- * Compute the bitmap index for a pointer known to be within a tiny region.
- */
-#define TINY_INDEX_FOR_PTR(_p) 			(((uintptr_t)(_p) >> SHIFT_TINY_QUANTUM) & (NUM_TINY_CEIL_BLOCKS - 1))
-
-#define TINY_CACHE	1	// This governs a last-free cache of 1 that bypasses the free-list
-
-#if ! TINY_CACHE
-#warning TINY_CACHE turned off
-#endif
-
-/*********************	DEFINITIONS for small	************************/
-
-/*
- * Memory in the Small range is allocated from regions (heaps) pointed to by the szone's small_regions
- * pointer.
- *
- * Each region is laid out as a heap, followed by the metadata array, all within an 8MB (2^23) block.
- * The array is arranged as an array of shorts, one for each SMALL_QUANTUM in the heap.
- * This means there are 16320 512-blocks and the array is 16320*2 bytes, which totals 8388480, leaving
- * 128 bytes unused.
- *
- * The MSB of each short is set for the first quantum in a free block.  The low 15 bits encode the
- * block size (in SMALL_QUANTUM units), or are zero if the quantum is not the first in a block.
- *
- * The szone maintains an array of 32 freelists, each of which is used to hold free objects
- * of the corresponding quantum size.
- *
- * A free block is laid out as:
- *
- * Offset (32-bit mode)	(64-bit mode)
- * 0x0          0x0      : previous
- * 0x4          0x08     : next
- * 0x8          0x10     : size (in quantum counts)
- * end - 2		end - 2  : size (in quantum counts)
- * end			end
- *
- * All fields are pointer-sized, except for the size which is an unsigned short.
- *
- */
-
-#define SMALL_IS_FREE				(1 << 15)
-
-#define	SHIFT_SMALL_QUANTUM			(SHIFT_TINY_QUANTUM + 5)	// 9
-#define	SMALL_QUANTUM				(1 << SHIFT_SMALL_QUANTUM) // 512 bytes
-
-#define FOLLOWING_SMALL_PTR(ptr,msize)		(((unsigned char *)(ptr)) + ((msize) << SHIFT_SMALL_QUANTUM))
-
-#define NUM_SMALL_SLOTS				32	// number of slots for free-lists
-
-/*
- * We can only represent up to 1<<15 for msize; but we choose to stay even below that to avoid the
- * convention msize=0 => msize = (1<<15)
- */
-#define NUM_SMALL_BLOCKS			16320
-#define SHIFT_SMALL_CEIL_BLOCKS			14 // ceil(log2(NUM_SMALL_BLOCKs))
-#define NUM_SMALL_CEIL_BLOCKS			(1 << SHIFT_SMALL_CEIL_BLOCKS)
-#define SMALL_BLOCKS_ALIGN			(SHIFT_SMALL_CEIL_BLOCKS + SHIFT_SMALL_QUANTUM) // 23
-#define SMALL_ARRAY_SIZE			(NUM_SMALL_BLOCKS * 2)
-#define SMALL_REGION_SIZE			((NUM_SMALL_BLOCKS * SMALL_QUANTUM + SMALL_ARRAY_SIZE + vm_page_size - 1) & ~ (vm_page_size - 1))	// data + meta data
-
-#define SMALL_PREVIOUS_MSIZE(ptr)		((msize_t *)(ptr))[-1]
-
-/*
- * Convert between byte and msize units.
- */
-#define SMALL_BYTES_FOR_MSIZE(_m)		((_m) << SHIFT_SMALL_QUANTUM)
-#define SMALL_MSIZE_FOR_BYTES(_b)		((_b) >> SHIFT_SMALL_QUANTUM)
-
-
-#define SMALL_REGION_ADDRESS(region)		((unsigned char *)region)
-#define SMALL_REGION_END(region)		(SMALL_REGION_ADDRESS(region) + (NUM_SMALL_BLOCKS * SMALL_QUANTUM))
-
-/*
- * Locate the heap base for a pointer known to be within a small region.
- */
-#define SMALL_REGION_FOR_PTR(_p)		((void *)((uintptr_t)(_p) & ~((1 << SMALL_BLOCKS_ALIGN) - 1)))
-
-/*
- * Locate the metadata base for a pointer known to be within a small region.
- */
-#define SMALL_HEADER_START			(NUM_SMALL_BLOCKS * SMALL_QUANTUM)
-#define SMALL_META_HEADER_FOR_PTR(_p)		((msize_t *)((uintptr_t)SMALL_REGION_FOR_PTR(_p) + SMALL_HEADER_START))
-
-/*
- * Compute the metadata index for a pointer known to be within a small region.
- */
-#define SMALL_META_INDEX_FOR_PTR(_p)	       (((uintptr_t)(_p) >> SHIFT_SMALL_QUANTUM) & (NUM_SMALL_CEIL_BLOCKS - 1))
-
-/*
- * Find the metadata word for a pointer known to be within a small region.
- */
-#define SMALL_METADATA_FOR_PTR(_p)		(SMALL_META_HEADER_FOR_PTR(_p) + SMALL_META_INDEX_FOR_PTR(_p))
-
-/*
- * Determine whether a pointer known to be within a small region points to memory which is free.
- */
-#define SMALL_PTR_IS_FREE(_p)			(*SMALL_METADATA_FOR_PTR(_p) & SMALL_IS_FREE)
-
-/*
- * Extract the msize value for a pointer known to be within a small region.
- */
-#define SMALL_PTR_SIZE(_p)			(*SMALL_METADATA_FOR_PTR(_p) & ~SMALL_IS_FREE)
-
-#define PROTECT_SMALL				0	// Should be 0: 1 is too slow for normal use
-
-#define SMALL_CACHE	1
-#if !SMALL_CACHE
-#warning SMALL_CACHE turned off
-#endif
-
-/*********************  DEFINITIONS for large and huge  ***********************/
-
-#define LARGE_THRESHOLD			(15 * 1024) // at or above this use "large"
-
-#if (LARGE_THRESHOLD > NUM_SMALL_SLOTS * SMALL_QUANTUM)
-#error LARGE_THRESHOLD should always be less than NUM_SMALL_SLOTS * SMALL_QUANTUM
-#endif
 
 #define VM_COPY_THRESHOLD		(40 * 1024)
     // When all memory is touched after a copy, vm_copy() is always a lose
     // But if the memory is only read, vm_copy() wins over memmove() at 3 or 4 pages (on a G3/300MHz)
     // This must be larger than LARGE_THRESHOLD
 
-/*
- * Given a large_entry, return the address of the allocated block.
- */
-#define LARGE_ENTRY_ADDRESS(entry)					\
-    (void *)(((entry).address_and_num_pages >> vm_page_shift) << vm_page_shift)
-
-/*
- * Given a large entry, return the number of pages or bytes in the allocated block.
- */
-#define LARGE_ENTRY_NUM_PAGES(entry)					\
-    ((entry).address_and_num_pages & (vm_page_size - 1))
-#define LARGE_ENTRY_SIZE(entry)						\
-    (LARGE_ENTRY_NUM_PAGES(entry) << vm_page_shift)
-
-/*
- * Compare a pointer with a large entry.
- */
-#define LARGE_ENTRY_MATCHES(entry,ptr)					\
-    ((((entry).address_and_num_pages - (uintptr_t)(ptr)) >> vm_page_shift) == 0)
-
-#define LARGE_ENTRY_IS_EMPTY(entry)	(((entry).address_and_num_pages) == 0)
+#define KILL_THRESHOLD			(32 * 1024)
+
+#define LARGE_THRESHOLD			(3 * vm_page_size) // at or above this use "large"
+
+#define	SHIFT_QUANTUM			4	// Required for AltiVec
+#define	QUANTUM				(1 << SHIFT_QUANTUM) // allocation quantum
+#define MIN_BLOCK			1	// minimum size, in QUANTUM multiples
+
+/* The header of a small block in use contains its size (expressed as multiples of QUANTUM, and header included), or 0;
+If 0 then the block is either free (in which case the size is directly at the block itself), or the last block (indicated by either being beyond range, or having 0 in the block itself) */
+
+#define PTR_HEADER_SIZE			(sizeof(msize_t))
+#define FOLLOWING_PTR(ptr,msize)	(((char *)(ptr)) + ((msize) << SHIFT_QUANTUM))
+#define PREVIOUS_MSIZE(ptr)		((msize_t *)(ptr))[-2]
+
+#define THIS_FREE			0x8000	// indicates this block is free
+#define PREV_FREE			0x4000	// indicates previous block is free
+#define MSIZE_FLAGS_FOR_PTR(ptr)	(((msize_t *)(ptr))[-1])
+
+#define REGION_SIZE			(1 << (16 - 2 + SHIFT_QUANTUM)) // since we only have 16 bits for msize_t, and 1 bit is taken by THIS_FREE, and 1 by PREV_FREE
+
+#define INITIAL_NUM_REGIONS		8 // must always be at least 2 to always have 1 slot empty
+
+#define CHECKSUM_MAGIC			0x357B
+
+#define PROTECT_SMALL			0	// Should be 0: 1 is too slow for normal use
+
+#define MAX_RECORDER_BUFFER	256
+
+#define MAX_GRAIN	128
+
+typedef unsigned short msize_t; // a size in multiples of SHIFT_QUANTUM
+
+typedef struct {
+    unsigned	checksum;
+    void	*previous;
+    void	*next;
+} free_list_t;
+
+typedef struct {
+    unsigned 	address_and_num_pages;
+    // this type represents both an address and a number of pages
+    // the low bits are the number of pages
+    // the high bits are the address
+    // note that the exact number of bits used for depends on the page size
+    // also, this cannot represent pointers larger than 1 << (vm_page_shift * 2)
+} compact_range_t;
+
+typedef vm_address_t region_t;
 
 typedef compact_range_t large_entry_t;
+
 typedef vm_range_t huge_entry_t;
 
-/*******************************************************************************
- * Definitions for region hash
- ******************************************************************************/
-
-typedef void * region_t;
-
-#define INITIAL_NUM_REGIONS 63  // Must be odd to hash well
-
-/*********************	zone itself	************************/
+typedef unsigned short	grain_t;
 
 typedef struct {
     malloc_zone_t	basic_zone;
     pthread_lock_t	lock;
     unsigned		debug_flags;
     void		*log_address;
-
-    /* Regions for tiny objects */
-    size_t num_tiny_regions;
-    size_t num_tiny_regions_allocated;
-    region_t *tiny_regions;  // hashed by location
-    region_t *last_tiny_region;
-    void		*last_tiny_free; // low SHIFT_TINY_QUANTUM indicate the msize
-    unsigned		tiny_bitmap; // cache of the 32 free lists
-    free_list_t		*tiny_free_list[NUM_TINY_SLOTS]; // 31 free lists for 1*TINY_QUANTUM to 31*TINY_QUANTUM plus 1 for larger than 32*SMALL_QUANTUM
-    size_t		tiny_bytes_free_at_end; // the last free region in the last block is treated as a big block in use that is not accounted for
-    unsigned		num_tiny_objects;
-    size_t		num_bytes_in_tiny_objects;
-
+    
     /* Regions for small objects */
-    size_t num_small_regions;
-    size_t num_small_regions_allocated;
-    region_t *small_regions;  // hashed by location
-    region_t *last_small_region;
-    void		*last_small_free; // low SHIFT_SMALL_QUANTUM indicate the msize
-    unsigned		small_bitmap; // cache of the free list
-    free_list_t		*small_free_list[NUM_SMALL_SLOTS];
-    size_t		small_bytes_free_at_end; // the last free region in the last block is treated as a big block in use that is not accounted for
+    unsigned		num_regions;
+    region_t		*regions;
+        // this array is always created with 1 extra slot to be able to add a region without taking memory right away
+    unsigned		last_region_hit;
+    free_list_t		*free_list[MAX_GRAIN];
+    unsigned		num_bytes_free_in_last_region; // these bytes are cleared
     unsigned		num_small_objects;
-    size_t		num_bytes_in_small_objects;
-
+    unsigned		num_bytes_in_small_objects;
+    
     /* large objects: vm_page_shift <= log2(size) < 2 *vm_page_shift */
     unsigned		num_large_objects_in_use;
     unsigned		num_large_entries;
-    large_entry_t	*large_entries; // hashed by location; null entries don't count
-    size_t		num_bytes_in_large_objects;
+    unsigned		num_bytes_in_large_objects;
+    large_entry_t	*large_entries;
+        // large_entries are hashed by location
+        // large_entries that are 0 should be discarded
     
     /* huge objects: log2(size) >= 2 *vm_page_shift */
+    unsigned		num_bytes_in_huge_objects;
     unsigned		num_huge_entries;
     huge_entry_t	*huge_entries;
-    size_t		num_bytes_in_huge_objects;
-
-    /* Initial region list */
-    region_t	initial_tiny_regions[INITIAL_NUM_REGIONS];
-    region_t	initial_small_regions[INITIAL_NUM_REGIONS];
 } szone_t;
 
-#define SZONE_PAGED_SIZE	((sizeof(szone_t) + vm_page_size - 1) & ~ (vm_page_size - 1))
-
-#if DEBUG_MALLOC || DEBUG_CLIENT
-static void		szone_sleep(void);
-#endif
-__private_extern__ void malloc_error_break(void);
-
-// msg prints after fmt, ...
-static void		szone_error(szone_t *szone, const char *msg, const void *ptr, const char *fmt, ...) __printflike(4, 5);
-
-static void		protect(void *address, size_t size, unsigned protection, unsigned debug_flags);
-static void		*allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label);
-static void		deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags);
-static kern_return_t	_szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr);
-
-static INLINE void	free_list_checksum(szone_t *szone, free_list_t *ptr, const char *msg) ALWAYSINLINE;
-static INLINE void	free_list_set_checksum(szone_t *szone, free_list_t *ptr) ALWAYSINLINE;
-static INLINE uintptr_t free_list_checksum_ptr(void *p) ALWAYSINLINE;
-static INLINE void * free_list_unchecksum_ptr(ptr_union ptr) ALWAYSINLINE;
-static unsigned		free_list_count(const free_list_t *ptr);
-
-static INLINE msize_t	get_tiny_meta_header(const void *ptr, boolean_t *is_free) ALWAYSINLINE;
-static INLINE void	set_tiny_meta_header_in_use(const void *ptr, msize_t msize) ALWAYSINLINE;
-static INLINE void	set_tiny_meta_header_middle(const void *ptr) ALWAYSINLINE;
-static INLINE void	set_tiny_meta_header_free(const void *ptr, msize_t msize) ALWAYSINLINE;
-static INLINE boolean_t	tiny_meta_header_is_free(const void *ptr) ALWAYSINLINE;
-static INLINE void	*tiny_previous_preceding_free(void *ptr, msize_t *prev_msize) ALWAYSINLINE;
-static void	tiny_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize);
-static void	tiny_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize);
-static INLINE region_t *tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE;
-static INLINE void	tiny_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) ALWAYSINLINE;
-static void		*tiny_malloc_from_region_no_lock(szone_t *szone, msize_t msize);
-static INLINE boolean_t	try_realloc_tiny_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE;
-static boolean_t	tiny_check_region(szone_t *szone, region_t region);
-static kern_return_t	tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder);
-static void	*tiny_malloc_from_free_list(szone_t *szone, msize_t msize);
-static INLINE void	*tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE;
-static INLINE void	free_tiny(szone_t *szone, void *ptr, region_t *tiny_region) ALWAYSINLINE;
-static void		print_tiny_free_list(szone_t *szone);
-static void		print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_end);
-static boolean_t	tiny_free_list_check(szone_t *szone, grain_t slot);
-
-static INLINE void	small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize) ALWAYSINLINE;
-static INLINE void	small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize) ALWAYSINLINE;
-static INLINE void	small_meta_header_set_middle(msize_t *meta_headers, msize_t index) ALWAYSINLINE;
-static void		small_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize);
-static void		small_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize);
-static INLINE region_t *small_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE;
-static INLINE void	small_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize) ALWAYSINLINE;
-static void		*small_malloc_from_region_no_lock(szone_t *szone, msize_t msize);
-static INLINE boolean_t	try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE;
-static boolean_t	szone_check_small_region(szone_t *szone, region_t region);
-static kern_return_t	small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder);
-static void	*small_malloc_from_free_list(szone_t *szone, msize_t msize);
-static INLINE void	*small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE;
-static INLINE void	*small_malloc_cleared_no_lock(szone_t *szone, msize_t msize) ALWAYSINLINE;
-static INLINE void	free_small(szone_t *szone, void *ptr, region_t *small_region) ALWAYSINLINE;
-static void		print_small_free_list(szone_t *szone);
-static void		print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_end);
-static boolean_t	small_free_list_check(szone_t *szone, grain_t grain);
-
-static region_t * hash_lookup_region_no_lock(region_t *regions, size_t num_entries, region_t r);
-static void hash_region_insert_no_lock(region_t *regions, size_t num_entries, region_t r);
-static region_t * hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries);
-static region_t * hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size, size_t *new_size);
-
-#if DEBUG_MALLOC
-static void		large_debug_print(szone_t *szone);
-#endif
-static large_entry_t	*large_entry_for_pointer_no_lock(szone_t *szone, const void *ptr);
-static void		large_entry_insert_no_lock(szone_t *szone, large_entry_t range);
-static INLINE void	large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry) ALWAYSINLINE;
-static INLINE large_entry_t *large_entries_alloc_no_lock(szone_t *szone, unsigned num) ALWAYSINLINE;
-static void		large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, vm_range_t *range_to_deallocate);
-static large_entry_t *	large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate);
-static vm_range_t	large_free_no_lock(szone_t *szone, large_entry_t *entry);
-static kern_return_t	large_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t large_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder);
-static huge_entry_t	*huge_entry_for_pointer_no_lock(szone_t *szone, const void *ptr);
-static boolean_t	huge_entry_append(szone_t *szone, huge_entry_t huge);
-static kern_return_t	huge_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t huge_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder);
-static void		*large_and_huge_malloc(szone_t *szone, size_t num_pages);
-static INLINE void	free_large_or_huge(szone_t *szone, void *ptr) ALWAYSINLINE;
-static INLINE int	try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE;
-
-static void		szone_free(szone_t *szone, void *ptr);
-static INLINE void	*szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested) ALWAYSINLINE;
-static void		*szone_malloc(szone_t *szone, size_t size);
-static void		*szone_calloc(szone_t *szone, size_t num_items, size_t size);
-static void		*szone_valloc(szone_t *szone, size_t size);
-static size_t		szone_size(szone_t *szone, const void *ptr);
-static void		*szone_realloc(szone_t *szone, void *ptr, size_t new_size);
-static unsigned		szone_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count);
-static void		szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count);
-static void		szone_destroy(szone_t *szone);
-static size_t		szone_good_size(szone_t *szone, size_t size);
-
-static boolean_t	szone_check_all(szone_t *szone, const char *function);
-static boolean_t	szone_check(szone_t *szone);
-static kern_return_t	szone_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder);
-static void		szone_print(szone_t *szone, boolean_t verbose);
-static void		szone_log(malloc_zone_t *zone, void *log_address);
-static void		szone_force_lock(szone_t *szone);
-static void		szone_force_unlock(szone_t *szone);
-
-static void		szone_statistics(szone_t *szone, malloc_statistics_t *stats);
-
-static void		*frozen_malloc(szone_t *zone, size_t new_size);
-static void		*frozen_calloc(szone_t *zone, size_t num_items, size_t size);
-static void		*frozen_valloc(szone_t *zone, size_t new_size);
-static void		*frozen_realloc(szone_t *zone, void *ptr, size_t new_size);
-static void		frozen_free(szone_t *zone, void *ptr);
-static void		frozen_destroy(szone_t *zone);
-
-#if DEBUG_MALLOC
-# define LOG(szone,ptr)							\
-    (szone->log_address && (((uintptr_t)szone->log_address == -1) || (szone->log_address == (void *)(ptr))))
-#else
-# define LOG(szone,ptr)		0
-#endif
-
-#define SZONE_LOCK(szone)						\
-	do {								\
-	    LOCK(szone->lock);						\
-	} while (0)
-
-#define SZONE_UNLOCK(szone)						\
-	do {								\
-	    UNLOCK(szone->lock);					\
-	} while (0)
-
-#define LOCK_AND_NOTE_LOCKED(szone,locked)				\
-do {									\
-    CHECK(szone, __PRETTY_FUNCTION__);					\
-    locked = 1; SZONE_LOCK(szone);					\
-} while (0)
-
-#if DEBUG_MALLOC || DEBUG_CLIENT
-# define CHECK(szone,fun)						\
-    if ((szone)->debug_flags & CHECK_REGIONS) szone_check_all(szone, fun)
-#else
-# define CHECK(szone,fun)	do {} while (0)
-#endif
-
-/*********************	VERY LOW LEVEL UTILITIES  ************************/
-
-#if DEBUG_MALLOC || DEBUG_CLIENT
-static void
-szone_sleep(void)
-{
-
-    if (getenv("MallocErrorSleep")) {
-	_malloc_printf(ASL_LEVEL_NOTICE, "*** sleeping to help debug\n");
-	sleep(3600); // to help debug
-    }
-}
-#endif
-
-// msg prints after fmt, ...
-static __attribute__((noinline)) void
-szone_error(szone_t *szone, const char *msg, const void *ptr, const char *fmt, ...)
-{
-    va_list ap;
-    _SIMPLE_STRING b = _simple_salloc();
-
+static void *szone_malloc(szone_t *szone, size_t size);
+static void *szone_valloc(szone_t *szone, size_t size);
+static INLINE void *szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested);
+static void szone_free(szone_t *szone, void *ptr);
+static size_t szone_good_size(szone_t *szone, size_t size);
+static boolean_t szone_check_all(szone_t *szone, const char *function);
+static void szone_print(szone_t *szone, boolean_t verbose);
+static INLINE region_t *region_for_ptr_no_lock(szone_t *szone, const void *ptr);
+static vm_range_t large_free_no_lock(szone_t *szone, large_entry_t *entry);
+
+#define LOG(szone,ptr)	(szone->log_address && (szone->num_small_objects > 8) && (((unsigned)szone->log_address == -1) || (szone->log_address == (void *)(ptr))))
+
+/*********************	ACCESSOR MACROS	************************/
+
+#define SZONE_LOCK(szone) 		LOCK(szone->lock)
+#define SZONE_UNLOCK(szone)		UNLOCK(szone->lock)
+
+#define CHECK(szone,fun)		if ((szone)->debug_flags & CHECK_REGIONS) szone_check_all(szone, fun)
+
+#define REGION_ADDRESS(region)		(region)
+#define REGION_END(region)		(region+REGION_SIZE)
+
+#define LARGE_ENTRY_ADDRESS(entry)	(((entry).address_and_num_pages >> vm_page_shift) << vm_page_shift)
+#define LARGE_ENTRY_NUM_PAGES(entry)	((entry).address_and_num_pages & ((1 << vm_page_shift) - 1))
+#define LARGE_ENTRY_SIZE(entry)		(LARGE_ENTRY_NUM_PAGES(entry) << vm_page_shift)
+#define LARGE_ENTRY_MATCHES(entry,ptr)	(!(((entry).address_and_num_pages - (unsigned)(ptr)) >> vm_page_shift))
+#define LARGE_ENTRY_IS_EMPTY(entry)	(!((entry).address_and_num_pages))
+
+/*********************	VERY LOW LEVEL UTILITIES	************************/
+
+static void szone_error(szone_t *szone, const char *msg, const void *ptr) {
     if (szone) SZONE_UNLOCK(szone);
-    if (b) {
-	if (fmt) {
-	    va_start(ap, fmt);
-	    _simple_vsprintf(b, fmt, ap);
-	    va_end(ap);
-	}
-	if (ptr) {
-	    _simple_sprintf(b, "*** error for object %p: %s\n", ptr, msg);
-	} else {
-	    _simple_sprintf(b, "*** error: %s\n", msg);
-	}
-	malloc_printf("%s*** set a breakpoint in malloc_error_break to debug\n", _simple_string(b));
-	_simple_sfree(b);
+    if (ptr) {
+        malloc_printf("*** malloc[%d]: error for object %p: %s\n", getpid(), ptr, msg);
+#if DEBUG_MALLOC
+        szone_print(szone, 1);
+#endif
     } else {
-	/*
-	 * Should only get here if vm_allocate() can't get a single page of
-	 * memory, implying _simple_asl_log() would also fail.  So we just
-	 * print to the file descriptor.
-	 */
-	if (fmt) {
-	    va_start(ap, fmt);
-	    _malloc_vprintf(MALLOC_PRINTF_NOLOG, fmt, ap);
-	    va_end(ap);
-	}
-	if (ptr) {
-	    _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error for object %p: %s\n", ptr, msg);
-	} else {
-	    _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error: %s\n", msg);
-	}
-	_malloc_printf(MALLOC_PRINTF_NOLOG, "*** set a breakpoint in malloc_error_break to debug\n");
-    }
-    malloc_error_break();
-#if DEBUG_MALLOC
-    szone_print(szone, 1);
-    szone_sleep();
-#endif
+        malloc_printf("*** malloc[%d]: error: %s\n", getpid(), msg);
+    }
 #if DEBUG_CLIENT
-    szone_sleep();
-#endif
-    if (szone->debug_flags & SCALABLE_MALLOC_ABORT_ON_ERROR) abort();
-}
-
-static void
-protect(void *address, size_t size, unsigned protection, unsigned debug_flags)
-{
+    malloc_printf("*** Sleeping to help debug\n");
+    sleep(3600); // to help debug
+#endif
+}
+
+static void protect(szone_t *szone, vm_address_t address, vm_size_t size, unsigned protection, unsigned debug_flags) {
     kern_return_t	err;
-
     if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_PRELUDE)) {
-	err = vm_protect(mach_task_self(), (vm_address_t)(uintptr_t)address - vm_page_size, vm_page_size, 0, protection);
-	if (err) {
-	    malloc_printf("*** can't protect(%p) region for prelude guard page at %p\n",
-	      protection,address - (1 << vm_page_shift));
-	}
+        err = vm_protect(mach_task_self(), address - vm_page_size, vm_page_size, 0, protection);
+        if (err) malloc_printf("*** malloc[%d]: Can't protect(%x) region for prelude guard page at 0x%x\n", getpid(), protection, address - vm_page_size);
     }
     if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_POSTLUDE)) {
-	err = vm_protect(mach_task_self(), (vm_address_t)(uintptr_t)address + size, vm_page_size, 0, protection);
-	if (err) {
-	    malloc_printf("*** can't protect(%p) region for postlude guard page at %p\n",
-	      protection, address + size);
-	}
-    }
-}
-
-static void *
-allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label)
-{
-    // align specifies a desired alignment (as a log) or 0 if no alignment requested
-    void            *vm_addr;
-    uintptr_t		addr, aligned_address;
+        err = vm_protect(mach_task_self(), (vm_address_t)(address + size), vm_page_size, 0, protection);
+        if (err) malloc_printf("*** malloc[%d]: Can't protect(%x) region for postlude guard page at 0x%x\n", getpid(), protection, address + size);
+    }
+}
+
+static vm_address_t allocate_pages(szone_t *szone, size_t size, unsigned debug_flags, int vm_page_label) {
+    kern_return_t	err;
+    vm_address_t	addr;
     boolean_t		add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
     size_t		allocation_size = round_page(size);
-    size_t		delta;
-    
-    if (align) add_guard_pages = 0; // too cumbersome to deal with that
-    if (!allocation_size) allocation_size = 1 << vm_page_shift;
-    if (add_guard_pages) allocation_size += 2 * (1 << vm_page_shift);
-    if (align) allocation_size += (size_t)1 << align;
-    vm_addr = mmap(0, allocation_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, VM_MAKE_TAG(vm_page_label), 0);
-    if ((uintptr_t)vm_addr == -1) {
-    	szone_error(szone, "can't allocate region", NULL, "*** mmap(size=%lld) failed (error code=%d)\n", (long long)allocation_size, errno);
-	    return NULL;
-    }
-    addr = (uintptr_t)vm_addr;
-    if (align) {
-	aligned_address = (addr + ((uintptr_t)1 << align) - 1) & ~ (((uintptr_t)1 << align) - 1);
-	if (aligned_address != addr) {
-	    delta = aligned_address - addr;
-	    if (munmap((void *)addr, delta) == -1)
-		malloc_printf("*** freeing unaligned header failed with %d\n", errno);
-	    addr = aligned_address;
-	    allocation_size -= delta;
-	}
-	if (allocation_size > size) {
-	    if (munmap((void *)(addr + size), allocation_size - size) == -1)
-		malloc_printf("*** freeing unaligned footer failed with %d\n", errno);
-	}
+    if (!allocation_size) allocation_size = vm_page_size;
+    if (add_guard_pages) allocation_size += 2 * vm_page_size;
+    err = vm_allocate(mach_task_self(), &addr, allocation_size, vm_page_label | 1);
+    if (err) {
+	malloc_printf("*** malloc: vm_allocate(size=%d) failed with %d\n", size, err);
+        szone_error(szone, "Can't allocate region", NULL);
+        return NULL;
     }
     if (add_guard_pages) {
-	addr += (uintptr_t)1 << vm_page_shift;
-	protect((void *)addr, size, 0, debug_flags);
-    }
-    return (void *)addr;
-}
-
-static void
-deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags)
-{
-    int	err;
-    boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
-
+        addr += vm_page_size;
+        protect(szone, addr, size, 0, debug_flags);
+    }
+    return addr;
+}
+
+static void deallocate_pages(szone_t *szone, vm_address_t addr, size_t size, unsigned debug_flags) {
+    kern_return_t	err;
+    boolean_t		add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
     if (add_guard_pages) {
-	addr -= 1 << vm_page_shift;
-	size += 2 * (1 << vm_page_shift);
-    }
-    err = munmap(addr, size);
-    if ((err == -1) && szone)
-	szone_error(szone, "Can't deallocate_pages region", addr, NULL);
-}
-
-static kern_return_t
-_szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr)
-{
+        addr -= vm_page_size;
+        size += 2 * vm_page_size;
+    }
+    err = vm_deallocate(mach_task_self(), addr, size);
+    if (err) {
+        szone_error(szone, "Can't deallocate_pages region", (void *)addr);
+    }
+}
+
+static kern_return_t _szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr) {
     *ptr = (void *)address;
     return 0;
 }
 
-/*********************	FREE LIST UTILITIES  ************************/
-
-// A free list entry is comprised of a pair of pointers, previous and next.
-// Because the free list entries are previously freed objects, there is a
-// non-zero chance that a misbehaved program will write to an allocated object
-// after it has called free() on the pointer.  This write would then potentially
-// corrupt the previous and next pointers, leading to a crash.  In order to
-// detect this case, we take advantage of the fact that pointers are known to
-// be at least 16 byte aligned, and thus have at least 4 trailing zero bits.
-// When an entry is added to the free list, the previous and next pointers are
-// shifted right by 2 bits, and then have the high and low 2 bits set, to act
-// as guard bits.  Before accessing a free list object, we verify that these
-// bits are still set, and log an error if they are not.
-
-static INLINE void
-free_list_checksum(szone_t *szone, free_list_t *ptr, const char *msg)
-{
-    uintptr_t ptrs = ptr->previous.u & ptr->next.u;
-
-#ifdef __LP64__
-    ptrs = (ptrs << 2) | (ptrs >> (64-2));
-#else
-    ptrs = (ptrs << 2) | (ptrs >> (32-2));
-#endif
-    
-    if ((ptrs & 15) != 15)
-    	szone_error(szone, "incorrect checksum for freed object "
-	                "- object was probably modified after being freed.", ptr, NULL);
-}
-
-static INLINE uintptr_t
-free_list_checksum_ptr(void *p) 
-{
-    ptr_union ptr;
-    ptr.p = p;
-    
-#ifdef __LP64__
-    return (ptr.u >> 2) | 0xC000000000000003ULL;
-#else
-    return (ptr.u >> 2) | 0xC0000003U;
-#endif
-}
-
-static INLINE void *
-free_list_unchecksum_ptr(ptr_union ptr)
-{
-    uintptr_t u = (ptr.u >> 2) << 4;
-    return (void *)u;
-}
-
-static INLINE void
-free_list_set_checksum(szone_t *szone, free_list_t *ptr)
-{
-    ptr->previous.u = free_list_checksum_ptr(ptr->previous.p);
-    ptr->next.u = free_list_checksum_ptr(ptr->next.p);
-}
-
-static unsigned
-free_list_count(const free_list_t *ptr)
-{
+/*********************	FREE LIST UTILITIES	************************/
+
+static INLINE grain_t grain_for_msize(szone_t *szone, msize_t msize) {
+    // assumes msize >= MIN_BLOCK
+#if DEBUG_MALLOC
+    if (msize < MIN_BLOCK) {
+        szone_error(szone, "grain_for_msize: msize too small", NULL);
+    }
+#endif
+    return (msize < MAX_GRAIN + MIN_BLOCK) ? msize - MIN_BLOCK : MAX_GRAIN - 1;
+}
+
+static INLINE msize_t msize_for_grain(szone_t *szone, grain_t grain) {
+    // 0 if multiple sizes
+    return (grain < MAX_GRAIN - 1) ? grain + MIN_BLOCK : 0;
+}
+
+static INLINE void free_list_checksum(szone_t *szone, free_list_t *ptr) {
+    // We always checksum, as testing whether to do it (based on szone->debug_flags) is as fast as doing it
+    if (ptr->checksum != (((unsigned)ptr->previous) ^ ((unsigned)ptr->next) ^ CHECKSUM_MAGIC)) {
+	szone_error(szone, "Incorrect checksum for freed object - object was probably modified after being freed; break at szone_error", ptr);
+    }
+}
+
+static INLINE void free_list_set_checksum(szone_t *szone, free_list_t *ptr) {
+    // We always set checksum, as testing whether to do it (based on szone->debug_flags) is slower than just doing it
+    ptr->checksum = ((unsigned)ptr->previous) ^ ((unsigned)ptr->next) ^ CHECKSUM_MAGIC;
+}
+
+static void free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize) {
+    // Adds an item to the proper free list
+    // Also marks the header of the block properly
+    grain_t	grain = grain_for_msize(szone, msize);
+    free_list_t	*free_ptr = ptr;
+    free_list_t	*free_head = szone->free_list[grain];
+    msize_t	*follower = (msize_t *)FOLLOWING_PTR(ptr, msize);
+#if DEBUG_MALLOC
+    if (LOG(szone,ptr)) malloc_printf("In free_list_add_ptr(), ptr=%p, msize=%d\n", ptr, msize);
+    if (((unsigned)ptr) & (QUANTUM - 1)) {
+        szone_error(szone, "free_list_add_ptr: Unaligned ptr", ptr);
+    }
+#endif
+    MSIZE_FLAGS_FOR_PTR(ptr) = msize | THIS_FREE;
+    if (free_head) {
+        free_list_checksum(szone, free_head);
+#if DEBUG_MALLOC
+        if (free_head->previous) {
+            malloc_printf("ptr=%p grain=%d free_head=%p previous=%p\n", ptr, grain, free_head, free_head->previous);
+            szone_error(szone, "free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr);
+        }
+        if (!(MSIZE_FLAGS_FOR_PTR(free_head) & THIS_FREE)) {
+            malloc_printf("ptr=%p grain=%d free_head=%p\n", ptr, grain, free_head);
+            szone_error(szone, "free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr);
+        }
+        if ((grain != MAX_GRAIN-1) && (MSIZE_FLAGS_FOR_PTR(free_head) != (THIS_FREE | msize))) {
+            malloc_printf("ptr=%p grain=%d free_head=%p previous_msize=%d\n", ptr, grain, free_head, MSIZE_FLAGS_FOR_PTR(free_head));
+            szone_error(szone, "free_list_add_ptr: Internal invariant broken (incorrect msize)", ptr);
+        }
+#endif
+        free_head->previous = free_ptr;
+        free_list_set_checksum(szone, free_head);
+    }
+    free_ptr->previous = NULL;
+    free_ptr->next = free_head;
+    free_list_set_checksum(szone, free_ptr);
+    szone->free_list[grain] = free_ptr;
+    // mark the end of this block
+    PREVIOUS_MSIZE(follower) = msize;
+    MSIZE_FLAGS_FOR_PTR(follower) |= PREV_FREE;
+}
+
+static void free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize) {
+    // Removes item in the proper free list
+    // msize could be read, but all callers have it so we pass it in
+    grain_t	grain = grain_for_msize(szone, msize);
+    free_list_t	*free_ptr = ptr;
+    free_list_t	*next = free_ptr->next;
+    free_list_t	*previous = free_ptr->previous;
+#if DEBUG_MALLOC
+    if (LOG(szone,ptr)) malloc_printf("In free_list_remove_ptr(), ptr=%p, msize=%d\n", ptr, msize);
+#endif
+    free_list_checksum(szone, free_ptr);
+    if (!previous) {
+#if DEBUG_MALLOC
+        if (szone->free_list[grain] != ptr) {
+            malloc_printf("ptr=%p grain=%d msize=%d szone->free_list[grain]=%p\n", ptr, grain, msize, szone->free_list[grain]);
+            szone_error(szone, "free_list_remove_ptr: Internal invariant broken (szone->free_list[grain])", ptr);
+            return;
+        }
+#endif
+        szone->free_list[grain] = next;
+    } else {
+        previous->next = next;
+        free_list_set_checksum(szone, previous);
+    }
+    if (next) {
+        next->previous = previous;
+        free_list_set_checksum(szone, next);
+    }
+    MSIZE_FLAGS_FOR_PTR(FOLLOWING_PTR(ptr, msize)) &= ~ PREV_FREE;
+}
+
+static boolean_t free_list_check(szone_t *szone, grain_t grain) {
     unsigned	count = 0;
-
+    free_list_t	*ptr = szone->free_list[grain];
+    free_list_t	*previous = NULL;
     while (ptr) {
+        msize_t	msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
         count++;
-        ptr = free_list_unchecksum_ptr(ptr->next);
-    }
-    return count;
-}
-
-/* XXX inconsistent use of BITMAP32 and BITARRAY operations could be cleaned up */
-
-#define BITMAP32_SET(bitmap,bit) 	(bitmap |= 1 << (bit))
-#define BITMAP32_CLR(bitmap,bit)	(bitmap &= ~ (1 << (bit)))
-#define BITMAP32_BIT(bitmap,bit) 	((bitmap >> (bit)) & 1)
-
-/* returns bit # of least-significant one bit, starting at 0 (undefined if !bitmap) */
-#define BITMAP32_CTZ(bitmap) (__builtin_ctz(bitmap))
-
-/*********************	TINY FREE LIST UTILITIES	************************/
-
-// We encode the meta-headers as follows:
-// Each quantum has an associated set of 2 bits:
-// block_header when 1 says this block is the beginning of a block
-// in_use when 1 says this block is in use
-// so a block in use of size 3 is 1-1 0-X 0-X
-// for a free block TINY_FREE_SIZE(ptr) carries the size and the bits are 1-0 X-X X-X
-// for a block middle the bits are 0-0
-
-// Attention double evaluation for these
-#define BITARRAY_SET(bits,index)	(bits[index>>3] |= (1 << (index & 7)))
-#define BITARRAY_CLR(bits,index)	(bits[index>>3] &= ~(1 << (index & 7)))
-#define BITARRAY_BIT(bits,index)	(((bits[index>>3]) >> (index & 7)) & 1)
-
-// Following is for start<8 and end<=start+32
-static void ALWAYSINLINE
-bitarray_mclr(void *bits, unsigned start, unsigned end) {
-    unsigned word = OSReadLittleInt32(bits, 0);
-    unsigned mask = (0xFFFFFFFFU >> (31 - start)) >> 1;
-
-    if (end > 31) {
-        unsigned char *bytes = (unsigned char *)bits;
-        bytes[4] &= ~((1 << (end - 32)) - 1);
+        if (!(msize_and_free & THIS_FREE)) {
+            malloc_printf("*** malloc[%d]: In-use ptr in free list grain=%d count=%d ptr=%p\n", getpid(), grain, count, ptr);
+            return 0;
+        }
+        if (((unsigned)ptr) & (QUANTUM - 1)) {
+            malloc_printf("*** malloc[%d]: Unaligned ptr in free list grain=%d  count=%d ptr=%p\n", getpid(), grain, count, ptr);
+            return 0;
+        }
+        if (!region_for_ptr_no_lock(szone, ptr)) {
+            malloc_printf("*** malloc[%d]: Ptr not in szone grain=%d  count=%d ptr=%p\n", getpid(), grain, count, ptr);
+            return 0;
+        }
+        free_list_checksum(szone, ptr);
+        if (ptr->previous != previous) {
+            malloc_printf("*** malloc[%d]: Previous incorrectly set grain=%d  count=%d ptr=%p\n", getpid(), grain, count, ptr);
+            return 0;
+        }
+        if ((grain != MAX_GRAIN-1) && (msize_and_free != (msize_for_grain(szone, grain) | THIS_FREE))) {
+            malloc_printf("*** malloc[%d]: Incorrect msize for grain=%d  count=%d ptr=%p msize=%d\n", getpid(), grain, count, ptr, msize_and_free);
+            return 0;
+        }
+        previous = ptr;
+        ptr = ptr->next;
+    }
+    return 1;
+}
+
+/*********************	SMALL BLOCKS MANAGEMENT	************************/
+
+static INLINE region_t *region_for_ptr_no_lock(szone_t *szone, const void *ptr) {
+    region_t		*first_region = szone->regions;
+    region_t		*region = first_region + szone->last_region_hit;
+    region_t		this = *region;
+    if ((unsigned)ptr - (unsigned)REGION_ADDRESS(this) < (unsigned)REGION_SIZE) {
+        return region;
     } else {
-        mask |= (0xFFFFFFFF << end);
-    }
-    OSWriteLittleInt32(bits, 0, word & mask);
-}
-
-/*
- * Obtain the size of a free tiny block (in msize_t units).
- */
-static msize_t
-get_tiny_free_size(const void *ptr)
-{
-    void *next_block = (void *)((uintptr_t)ptr + TINY_QUANTUM);
-    void *region_end = TINY_REGION_END(TINY_REGION_FOR_PTR(ptr));
-
-    // check whether the next block is outside the tiny region or a block header
-    // if so, then the size of this block is one, and there is no stored size.
-    if (next_block < region_end)
-    {
-        unsigned char *next_header = TINY_BLOCK_HEADER_FOR_PTR(next_block);
-        msize_t        next_index  = TINY_INDEX_FOR_PTR(next_block);
-        
-        if (!BITARRAY_BIT(next_header, next_index))
-            return TINY_FREE_SIZE(ptr);
-    }
-    return 1;
-}
-
-/*
- * Get the size of the previous free block, which is stored in the last two
- * bytes of the block.  If the previous block is not free, then the result is
- * undefined.
- */
-static msize_t
-get_tiny_previous_free_msize(const void *ptr)
-{
-    // check whether the previous block is in the tiny region and a block header
-    // if so, then the size of the previous block is one, and there is no stored
-    // size.
-    if (ptr != TINY_REGION_FOR_PTR(ptr))
-    {
-        void          *prev_block = (void *)((uintptr_t)ptr - TINY_QUANTUM);
-        unsigned char *prev_header = TINY_BLOCK_HEADER_FOR_PTR(prev_block);
-        msize_t        prev_index  = TINY_INDEX_FOR_PTR(prev_block);
-        if (BITARRAY_BIT(prev_header, prev_index))
-            return 1;
-        return TINY_PREVIOUS_MSIZE(ptr);
-    }
-    // don't read possibly unmapped memory before the beginning of the region
-    return 0;
-}
-
-static INLINE msize_t
-get_tiny_meta_header(const void *ptr, boolean_t *is_free)
-{
-    // returns msize and is_free
-    // may return 0 for the msize component (meaning 65536)
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-    unsigned		byte_index;
-
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    index = TINY_INDEX_FOR_PTR(ptr);
-    byte_index = index >> 3;
-      
-    block_header += byte_index;
-    index &= 7;
-    *is_free = 0;
-    if (!BITMAP32_BIT(*block_header, index))
-        return 0;
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    if (!BITMAP32_BIT(*in_use, index)) {
-        *is_free = 1;
-        return get_tiny_free_size(ptr);
-    }
-    uint32_t	*addr = (uint32_t *)((uintptr_t)block_header & ~3);
-    uint32_t	word0 = OSReadLittleInt32(addr, 0) >> index;
-    uint32_t	word1 = OSReadLittleInt32(addr, 4) << (8 - index);
-    uint32_t	bits = (((uintptr_t)block_header & 3) * 8);	// precision loss on LP64 OK here
-    uint32_t	word = (word0 >> bits) | (word1 << (24 - bits));
-    uint32_t	result = ffs(word >> 1);
-    return result;
-}
-
-static INLINE void
-set_tiny_meta_header_in_use(const void *ptr, msize_t msize)
-{
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-    unsigned		byte_index;
-    msize_t		clr_msize;
-    unsigned		end_bit;
-	
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    index = TINY_INDEX_FOR_PTR(ptr);
-    byte_index = index >> 3;
-    
-#if DEBUG_MALLOC
-    if (msize >= 32)
-	malloc_printf("set_tiny_meta_header_in_use() invariant broken %p %d\n", ptr, msize);
-    if ((unsigned)index + (unsigned)msize > 0x10000)
-	malloc_printf("set_tiny_meta_header_in_use() invariant broken (2) %p %d\n", ptr, msize);
-#endif
-    block_header += byte_index;
-    index &= 7;
-    BITMAP32_SET(*block_header, index); 
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    BITMAP32_SET(*in_use, index); 
-    index++; 
-    clr_msize = msize-1;
-    if (clr_msize) {
-	byte_index = index >> 3;
-	block_header += byte_index; in_use += byte_index;
-	index &= 7;
-	end_bit = index + clr_msize;
-	bitarray_mclr(block_header, index, end_bit);
-	bitarray_mclr(in_use, index, end_bit);
-    }
-    BITARRAY_SET(block_header, index+clr_msize); // we set the block_header bit for the following block to reaffirm next block is a block
-#if DEBUG_MALLOC
-    {
-	boolean_t ff;
-	msize_t	mf;
-	
-	mf = get_tiny_meta_header(ptr, &ff);
-	if (msize != mf) {
-	    malloc_printf("setting header for tiny in_use %p : %d\n", ptr, msize);
-	    malloc_printf("reading header for tiny %p : %d %d\n", ptr, mf, ff);
-	}
-    }
-#endif
-}
-
-static INLINE void
-set_tiny_meta_header_middle(const void *ptr)
-{
-    // indicates this block is in the middle of an in use block
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    index = TINY_INDEX_FOR_PTR(ptr);
-
-    BITARRAY_CLR(block_header, index);
-    BITARRAY_CLR(in_use, index); 
-}
-
-static INLINE void
-set_tiny_meta_header_free(const void *ptr, msize_t msize)
-{
-    // !msize is acceptable and means 65536
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    index = TINY_INDEX_FOR_PTR(ptr);
-
-#if DEBUG_MALLOC
-    if ((unsigned)index + (unsigned)msize > 0x10000) {
-	malloc_printf("setting header for tiny free %p msize too large: %d\n", ptr, msize);
-    }
-#endif
-    BITARRAY_SET(block_header, index);
-    BITARRAY_CLR(in_use, index); 
-    // mark the end of this block if msize is > 1.  For msize == 0, the whole
-    // region is free, so there is no following block. For msize == 1, there is
-    // no space to write the size on 64 bit systems.  The size for 1 quantum
-    // blocks is computed from the metadata bitmaps.
-    if (msize > 1) {
-        void	*follower = FOLLOWING_TINY_PTR(ptr, msize);
-        TINY_PREVIOUS_MSIZE(follower) = msize;
-        TINY_FREE_SIZE(ptr) = msize;
-    }
-    if (msize == 0) {
-        TINY_FREE_SIZE(ptr) = msize;
-    }
-#if DEBUG_MALLOC
-    boolean_t	ff;
-    msize_t	mf = get_tiny_meta_header(ptr, &ff);
-    if ((msize != mf) || !ff) {
-	malloc_printf("setting header for tiny free %p : %u\n", ptr, msize);
-	malloc_printf("reading header for tiny %p : %u %u\n", ptr, mf, ff);
-    }
-#endif
-}
-
-static INLINE boolean_t
-tiny_meta_header_is_free(const void *ptr)
-{
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    index = TINY_INDEX_FOR_PTR(ptr);
-    if (!BITARRAY_BIT(block_header, index))
-	return 0;
-    return !BITARRAY_BIT(in_use, index);
-}
-
-static INLINE void *
-tiny_previous_preceding_free(void *ptr, msize_t *prev_msize)
-{
-    // returns the previous block, assuming and verifying it's free
-    unsigned char	*block_header;
-    unsigned char	*in_use;
-    msize_t		index;
-    msize_t		previous_msize;
-    msize_t		previous_index;
-    void		*previous_ptr;
-
-    block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
-    in_use = TINY_INUSE_FOR_HEADER(block_header);
-    index = TINY_INDEX_FOR_PTR(ptr);
-    
-    if (!index)
+        // We iterate in reverse order becase last regions are more likely
+        region = first_region + szone->num_regions;
+        while (region != first_region) {
+            this = *(--region);
+            if ((unsigned)ptr - (unsigned)REGION_ADDRESS(this) < (unsigned)REGION_SIZE) {
+                szone->last_region_hit = region - first_region;
+                return region;
+            }
+        }
         return NULL;
-    if ((previous_msize = get_tiny_previous_free_msize(ptr)) > index)
-        return NULL;
-    
-    previous_index = index - previous_msize;
-    previous_ptr = (void *)(TINY_REGION_FOR_PTR(ptr) + TINY_BYTES_FOR_MSIZE(previous_index));
-    if (!BITARRAY_BIT(block_header, previous_index))
-        return NULL;
-    if (BITARRAY_BIT(in_use, previous_index))
-        return NULL;
-    if (get_tiny_free_size(previous_ptr) != previous_msize)
-        return NULL;
-    
-    // conservative check did match true check
-    *prev_msize = previous_msize;
-    return previous_ptr;
-}
-
-/* 
- * Adds an item to the proper free list, and also marks the meta-header of the 
- * block properly.
- * Assumes szone has been locked
- */ 
-static void
-tiny_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize)
-{
-    grain_t	slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1;
-    free_list_t	*free_ptr = ptr;
-    free_list_t	*free_head = szone->tiny_free_list[slot];
-
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-        malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
-    }
-    if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) {
-        szone_error(szone, "tiny_free_list_add_ptr: Unaligned ptr", ptr, NULL);
-    }
-#endif
-    set_tiny_meta_header_free(ptr, msize);
-    if (free_head) {
-        free_list_checksum(szone, free_head, __PRETTY_FUNCTION__);
-#if DEBUG_MALLOC
-        if (free_list_unchecksum_ptr(free_head->previous)) {
-            szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr,
-		"ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, free_head, free_head->previous.p);
-        }
-        if (! tiny_meta_header_is_free(free_head)) {
-            szone_error(szone, "tiny_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr,
-		"ptr=%p slot=%d free_head=%p\n", ptr, slot, free_head);
-        }
-#endif
-        free_head->previous.u = free_list_checksum_ptr(free_ptr);
-    } else {
-        BITMAP32_SET(szone->tiny_bitmap, slot);
-    }
-    free_ptr->previous.p = NULL;
-    free_ptr->next.p = free_head;
-    free_list_set_checksum(szone, free_ptr);
-    szone->tiny_free_list[slot] = free_ptr;
-}
-
-/* 
- * Removes the item pointed to by ptr in the proper free list. 
- * Assumes szone has been locked
- */ 
-static INLINE void
-tiny_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize)
-{
-    grain_t	slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1;
-    free_list_t	*free_ptr = ptr, *next, *previous;
-    free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__);
-
-    next = free_list_unchecksum_ptr(free_ptr->next);
-    previous = free_list_unchecksum_ptr(free_ptr->previous);
-
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-        malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
-    }
-#endif
-    if (!previous) { 
-    // The block to remove is the head of the free list
-#if DEBUG_MALLOC
-        if (szone->tiny_free_list[slot] != ptr) {
-            szone_error(szone, "tiny_free_list_remove_ptr: Internal invariant broken (szone->tiny_free_list[slot])", ptr,
-              "ptr=%p slot=%d msize=%d szone->tiny_free_list[slot]=%p\n",
-              ptr, slot, msize, szone->tiny_free_list[slot]);
-            return;
-        }
-#endif
-        szone->tiny_free_list[slot] = next;
-        if (!next) BITMAP32_CLR(szone->tiny_bitmap, slot);
-    } else {
-        // We know free_ptr is already checksummed, so we don't need to do it
-        // again.
-        previous->next = free_ptr->next;
-    }
-    if (next) {
-        // We know free_ptr is already checksummed, so we don't need to do it
-        // again.
-        next->previous = free_ptr->previous;
-    }
-}
-
-/*
- * tiny_region_for_ptr_no_lock - Returns the tiny region containing the pointer,
- * or NULL if not found.
- */
-static INLINE region_t *
-tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr)
-{
-  return hash_lookup_region_no_lock(szone->tiny_regions,
-                                    szone->num_tiny_regions_allocated,
-                                    TINY_REGION_FOR_PTR(ptr));
-}
-
-static INLINE void
-tiny_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize)
-{
-    size_t	original_size = TINY_BYTES_FOR_MSIZE(msize);
+    }
+}
+
+static INLINE void small_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize_and_free) {
+    msize_t	msize = msize_and_free & ~ PREV_FREE;
+    size_t	original_size = msize << SHIFT_QUANTUM;
     void	*next_block = ((char *)ptr + original_size);
-    msize_t	previous_msize;
-    void	*previous;
-    msize_t	next_msize;
-    free_list_t	*big_free_block;
-    free_list_t	*after_next_block;
-    free_list_t	*before_next_block;
-
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
-    }
-    if (! msize) {
-	szone_error(szone, "trying to free tiny block that is too small", ptr,
-	    "in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
-    }
-#endif
-    // We try to coalesce this block with the preceeding one
-    previous = tiny_previous_preceding_free(ptr, &previous_msize);
-    if (previous) {
-#if DEBUG_MALLOC
-        if (LOG(szone, ptr) || LOG(szone,previous)) { 
-            malloc_printf("in tiny_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous);
-        }
-#endif
-        // clear the meta_header since this is no longer the start of a block
-        set_tiny_meta_header_middle(ptr);
-        tiny_free_list_remove_ptr(szone, previous, previous_msize);
+    msize_t	next_msize_and_free;
+#if DEBUG_MALLOC
+    if (LOG(szone,ptr)) malloc_printf("In small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
+    if (msize < MIN_BLOCK) {
+        malloc_printf("In small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
+        szone_error(szone, "Trying to free small block that is too small", ptr);
+    }
+#endif
+    if (((vm_address_t)next_block < REGION_END(*region)) && ((next_msize_and_free = MSIZE_FLAGS_FOR_PTR(next_block)) & THIS_FREE)) {
+        // If the next block is free, we coalesce
+        msize_t	next_msize = next_msize_and_free & ~THIS_FREE;
+        if (LOG(szone,ptr)) malloc_printf("In small_free_no_lock(), for ptr=%p, msize=%d coalesced next block=%p next_msize=%d\n", ptr, msize, next_block, next_msize);
+        free_list_remove_ptr(szone, next_block, next_msize);
+        msize += next_msize;
+    }
+    // Let's try to coalesce backwards now
+    if (msize_and_free & PREV_FREE) {
+        msize_t	previous_msize = PREVIOUS_MSIZE(ptr);
+        void	*previous = ptr - (previous_msize << SHIFT_QUANTUM);
+#if DEBUG_MALLOC
+        if (LOG(szone,previous)) malloc_printf("In small_free_no_lock(), coalesced backwards for %p previous=%p, msize=%d\n", ptr, previous, previous_msize);
+        if (!previous_msize || (previous_msize >= (((vm_address_t)ptr - REGION_ADDRESS(*region)) >> SHIFT_QUANTUM))) {
+            szone_error(szone, "Invariant 1 broken when coalescing backwards", ptr);
+        }
+        if (MSIZE_FLAGS_FOR_PTR(previous) != (previous_msize | THIS_FREE)) {
+            malloc_printf("previous=%p its_msize_and_free=0x%x previous_msize=%d\n", previous, MSIZE_FLAGS_FOR_PTR(previous), previous_msize);
+            szone_error(szone, "Invariant 3 broken when coalescing backwards", ptr);
+        }
+#endif
+        free_list_remove_ptr(szone, previous, previous_msize);
         ptr = previous;
         msize += previous_msize;
-    }
-    // We try to coalesce with the next block
-    if ((next_block < TINY_REGION_END(*region)) && tiny_meta_header_is_free(next_block)) {
-        next_msize = get_tiny_free_size(next_block);
-#if DEBUG_MALLOC
-        if (LOG(szone, ptr) || LOG(szone, next_block)) {
-            malloc_printf("in tiny_free_no_lock(), for ptr=%p, msize=%d coalesced forward=%p next_msize=%d\n",
-              ptr, msize, next_block, next_msize);
-        }
-#endif
-        // If we are coalescing with the next block, and the next block is in
-        // the last slot of the free list, then we optimize this case here to 
-        // avoid removing next_block from the slot 31 and then adding ptr back
-        // to slot 31.
-        if (next_msize >= NUM_TINY_SLOTS) {
-            msize += next_msize;
-            big_free_block = (free_list_t *)next_block;
-            free_list_checksum(szone, big_free_block, __PRETTY_FUNCTION__);
-            after_next_block = free_list_unchecksum_ptr(big_free_block->next);
-            before_next_block = free_list_unchecksum_ptr(big_free_block->previous);
-            if (!before_next_block) {
-                szone->tiny_free_list[NUM_TINY_SLOTS-1] = ptr;
+#if DEBUG_MALLOC
+	if (msize & PREV_FREE) {
+	    malloc_printf("In small_free_no_lock(), after coalescing with previous ptr=%p, msize=%d previous_msize=%d\n", ptr, msize, previous_msize);
+	    szone_error(szone, "Incorrect coalescing", ptr);
+	}
+#endif 
+    }
+    if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) {
+	if (!msize) {
+	    szone_error(szone, "Incorrect size information - block header was damaged", ptr);
+	} else {
+	    memset(ptr, 0x55, (msize << SHIFT_QUANTUM) - PTR_HEADER_SIZE);
+	}
+    }
+    free_list_add_ptr(szone, ptr, msize);
+    CHECK(szone, "small_free_no_lock: added to free list");
+    szone->num_small_objects--;
+    szone->num_bytes_in_small_objects -= original_size; // we use original_size and not msize to avoid double counting the coalesced blocks
+}
+
+static void *small_malloc_from_region_no_lock(szone_t *szone, msize_t msize) {
+    // Allocates from the last region or a freshly allocated region
+    region_t		*last_region = szone->regions + szone->num_regions - 1;
+    vm_address_t	new_address;
+    void		*ptr;
+    msize_t		msize_and_free;
+    unsigned		region_capacity;
+    ptr = (void *)(REGION_END(*last_region) - szone->num_bytes_free_in_last_region + PTR_HEADER_SIZE);
+#if DEBUG_MALLOC
+    if (((vm_address_t)ptr) & (QUANTUM - 1)) {
+        szone_error(szone, "Invariant broken while using end of region", ptr);
+    }
+#endif
+    msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
+#if DEBUG_MALLOC
+    if (msize_and_free != PREV_FREE && msize_and_free != 0) {
+        malloc_printf("*** malloc[%d]: msize_and_free = %d\n", getpid(), msize_and_free);
+        szone_error(szone, "Invariant broken when allocating at end of zone", ptr);
+    }
+#endif
+    // In order to make sure we don't have 2 free pointers following themselves, if the last item is a free item, we combine it and clear it
+    if (msize_and_free == PREV_FREE) {
+        msize_t		previous_msize = PREVIOUS_MSIZE(ptr);
+        void		*previous = ptr - (previous_msize << SHIFT_QUANTUM);
+#if DEBUG_MALLOC
+        if (LOG(szone, ptr)) malloc_printf("Combining last with free space at %p\n", ptr);
+        if (!previous_msize || (previous_msize >= (((vm_address_t)ptr - REGION_ADDRESS(*last_region)) >> SHIFT_QUANTUM)) || (MSIZE_FLAGS_FOR_PTR(previous) != (previous_msize | THIS_FREE))) {
+            szone_error(szone, "Invariant broken when coalescing backwards at end of zone", ptr);
+        }
+#endif
+        free_list_remove_ptr(szone, previous, previous_msize);
+        szone->num_bytes_free_in_last_region += previous_msize << SHIFT_QUANTUM;
+        memset(previous, 0, previous_msize << SHIFT_QUANTUM);
+        MSIZE_FLAGS_FOR_PTR(previous) = 0;
+        ptr = previous;
+    }
+    // first try at the end of the last region
+    CHECK(szone, __PRETTY_FUNCTION__);
+    if (szone->num_bytes_free_in_last_region >= (msize << SHIFT_QUANTUM)) {
+        szone->num_bytes_free_in_last_region -= (msize << SHIFT_QUANTUM);
+        szone->num_small_objects++;
+        szone->num_bytes_in_small_objects += msize << SHIFT_QUANTUM;
+        MSIZE_FLAGS_FOR_PTR(ptr) = msize;
+        return ptr;
+    }
+    // time to create a new region
+    new_address = allocate_pages(szone, REGION_SIZE, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_SMALL));
+    if (!new_address) {
+        // out of memory!
+        return NULL;
+    }
+    // let's prepare to free the remnants of last_region
+    if (szone->num_bytes_free_in_last_region >= QUANTUM) {
+        msize_t	this_msize = szone->num_bytes_free_in_last_region >> SHIFT_QUANTUM;
+        // malloc_printf("Entering last block %p size=%d\n", ptr, this_msize << SHIFT_QUANTUM);
+        if (this_msize >= MIN_BLOCK) {
+            free_list_add_ptr(szone, ptr, this_msize);
+        } else {
+            // malloc_printf("Leaking last block at %p\n", ptr);
+        }
+        szone->num_bytes_free_in_last_region -= this_msize << SHIFT_QUANTUM; // to avoid coming back here
+    }
+    last_region[1] = new_address;
+    szone->num_regions++;
+    szone->num_bytes_free_in_last_region = REGION_SIZE - QUANTUM + PTR_HEADER_SIZE - (msize << SHIFT_QUANTUM);
+    ptr = (void *)(new_address + QUANTUM); // waste the first bytes
+    region_capacity = (MSIZE_FLAGS_FOR_PTR(szone->regions) * QUANTUM - PTR_HEADER_SIZE) / sizeof(region_t);
+    if (szone->num_regions >= region_capacity) {
+        unsigned	new_capacity = region_capacity * 2 + 1;
+        msize_t		new_msize = (new_capacity * sizeof(region_t) + PTR_HEADER_SIZE + QUANTUM - 1) / QUANTUM;
+        region_t	*new_regions = ptr;
+        // malloc_printf("Now %d regions growing regions %p to %d\n", szone->num_regions, szone->regions, new_capacity);
+        MSIZE_FLAGS_FOR_PTR(new_regions) = new_msize;
+        szone->num_small_objects++;
+        szone->num_bytes_in_small_objects += new_msize << SHIFT_QUANTUM;
+        memcpy(new_regions, szone->regions, szone->num_regions * sizeof(region_t));
+        // We intentionally leak the previous regions pointer to avoid multi-threading crashes if another thread was reading it (unlocked) while we are changing it
+        // Given that in practise the number of regions is typically a handful, this should not be a big deal
+        szone->regions = new_regions;
+        ptr += (new_msize << SHIFT_QUANTUM);
+        szone->num_bytes_free_in_last_region -= (new_msize << SHIFT_QUANTUM);
+        // malloc_printf("Regions is now %p next ptr is %p\n", szone->regions, ptr);
+    }
+    szone->num_small_objects++;
+    szone->num_bytes_in_small_objects += msize << SHIFT_QUANTUM;
+    MSIZE_FLAGS_FOR_PTR(ptr) = msize;
+    return ptr;
+}
+
+static boolean_t szone_check_region(szone_t *szone, region_t *region) {
+    void		*ptr = (void *)REGION_ADDRESS(*region) + QUANTUM;
+    vm_address_t	region_end = REGION_END(*region);
+    int			is_last_region = region == szone->regions + szone->num_regions - 1;
+    msize_t		prev_free = 0;
+    while ((vm_address_t)ptr < region_end) {
+        msize_t		msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
+        if (!(msize_and_free & THIS_FREE)) {
+            msize_t	msize = msize_and_free & ~PREV_FREE;
+            if ((msize_and_free & PREV_FREE) != prev_free) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p (prev_free=%d) this msize=%d\n", getpid(), ptr, prev_free, msize_and_free);
+                return 0;
+            }
+            if (!msize) {
+                int	extra = (is_last_region) ? szone->num_bytes_free_in_last_region : QUANTUM;
+                if (((unsigned)(ptr + extra)) < region_end) {
+                    malloc_printf("*** malloc[%d]: invariant broken at region end: ptr=%p extra=%d index=%d num_regions=%d end=%p\n", getpid(), ptr, extra, region - szone->regions, szone->num_regions, (void *)region_end);
+                    return 0;
+                }
+                break; // last encountered
+            }
+	    if (msize > (LARGE_THRESHOLD / QUANTUM)) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p this msize=%d - size is too large\n", getpid(), ptr, msize_and_free);
+                return 0;
+            }
+            if ((msize < MIN_BLOCK) && ((unsigned)ptr != region_end - QUANTUM)) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p this msize=%d - size is too small\n", getpid(), ptr, msize_and_free);
+                return 0;
+            }
+            ptr += msize << SHIFT_QUANTUM;
+            prev_free = 0;
+	    if (is_last_region && ((vm_address_t)ptr - PTR_HEADER_SIZE > region_end - szone->num_bytes_free_in_last_region)) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p this msize=%d - block extends beyond allocated region\n", getpid(), ptr, msize_and_free);
+	    }
+        } else {
+            // free pointer
+            msize_t	msize = msize_and_free & ~THIS_FREE;
+            free_list_t	*free_head = ptr;
+            msize_t	*follower = (void *)FOLLOWING_PTR(ptr, msize);
+	    if ((msize_and_free & PREV_FREE) && !prev_free) {
+                malloc_printf("*** malloc[%d]: invariant broken for free block %p this msize=%d: PREV_FREE set while previous block is in use\n", getpid(), ptr, msize);
+                return 0;
+	    }
+            if (msize < MIN_BLOCK) {
+                malloc_printf("*** malloc[%d]: invariant broken for free block %p this msize=%d\n", getpid(), ptr, msize);
+                return 0;
+            }
+            if (prev_free) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p (2 free in a row)\n", getpid(), ptr);
+                return 0;
+            }
+            free_list_checksum(szone, free_head);
+            if (free_head->previous && !(MSIZE_FLAGS_FOR_PTR(free_head->previous) & THIS_FREE)) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p (previous %p is not a free pointer)\n", getpid(), ptr, free_head->previous);
+                return 0;
+            }
+            if (free_head->next && !(MSIZE_FLAGS_FOR_PTR(free_head->next) & THIS_FREE)) {
+                malloc_printf("*** malloc[%d]: invariant broken for %p (next is not a free pointer)\n", getpid(), ptr);
+                return 0;
+            }
+            if (PREVIOUS_MSIZE(follower) != msize) {
+                malloc_printf("*** malloc[%d]: invariant broken for free %p followed by %p in region [%x-%x] (end marker incorrect) should be %d; in fact %d\n", getpid(), ptr, follower, REGION_ADDRESS(*region), region_end, msize, PREVIOUS_MSIZE(follower));
+                return 0;
+            }
+            ptr = follower;
+            prev_free = PREV_FREE;
+        }
+    }
+    return 1;
+}
+
+static kern_return_t small_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t region_address, unsigned num_regions, memory_reader_t reader, vm_range_recorder_t recorder) {
+    region_t		*regions;
+    unsigned		index = 0;
+    vm_range_t		buffer[MAX_RECORDER_BUFFER];
+    unsigned		count = 0;
+    kern_return_t	err;
+    err = reader(task, region_address, sizeof(region_t) * num_regions, (void **)&regions);
+    if (err) return err;
+    while (index < num_regions) {
+        region_t	region = regions[index++];
+        vm_range_t	range = {REGION_ADDRESS(region), REGION_SIZE};
+        vm_address_t	start = range.address + QUANTUM;
+        // malloc_printf("Enumerating small ptrs for Region starting at 0x%x\n", start);
+        if (type_mask & MALLOC_PTR_REGION_RANGE_TYPE) recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &range, 1);
+        if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) while (start < range.address + range.size) {
+            void	*previous;
+            msize_t	msize_and_free;
+            err = reader(task, start - PTR_HEADER_SIZE, QUANTUM, (void **)&previous);
+            if (err) return err;
+            previous += PTR_HEADER_SIZE;
+            msize_and_free = MSIZE_FLAGS_FOR_PTR(previous);
+            if (!(msize_and_free & THIS_FREE)) {
+                // Block in use
+                msize_t		msize = msize_and_free & ~PREV_FREE;
+                if (!msize) break; // last encountered
+                buffer[count].address = start;
+                buffer[count].size = (msize << SHIFT_QUANTUM) - PTR_HEADER_SIZE;
+                count++;
+                if (count >= MAX_RECORDER_BUFFER) {
+                    recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
+                    count = 0;
+                }
+                start += msize << SHIFT_QUANTUM;
             } else {
-                before_next_block->next.u = free_list_checksum_ptr(ptr);
+                // free pointer
+                msize_t	msize = msize_and_free & ~THIS_FREE;
+                start += msize << SHIFT_QUANTUM;
             }
-            if (after_next_block) {
-                after_next_block->previous.u = free_list_checksum_ptr(ptr);
-            }
-            // we don't need to checksum these since they are already checksummed
-            ((free_list_t *)ptr)->previous = big_free_block->previous;
-            ((free_list_t *)ptr)->next = big_free_block->next;
-            
-            // clear the meta_header to enable coalescing backwards
-            set_tiny_meta_header_middle(big_free_block);
-            set_tiny_meta_header_free(ptr, msize);
-            goto tiny_free_ending;
-        }
-        tiny_free_list_remove_ptr(szone, next_block, next_msize);
-        set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
-        msize += next_msize;
-    }
-#if !TINY_CACHE
-    // The tiny cache already scribbles free blocks as they go through the
-    // cache, so we do not need to do it here.
-    if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize) {
-	memset(ptr, 0x55, TINY_BYTES_FOR_MSIZE(msize));
-    }
-#endif
-    tiny_free_list_add_ptr(szone, ptr, msize);
-  tiny_free_ending:
-    // When in proper debug mode we write on the memory to help debug memory smashers
-    szone->num_tiny_objects--;
-    szone->num_bytes_in_tiny_objects -= original_size; // we use original_size and not msize to avoid double counting the coalesced blocks
-}
-
-// Allocates from the last region or a freshly allocated region
-static void *
-tiny_malloc_from_region_no_lock(szone_t *szone, msize_t msize)
-{
-    void            *last_block, *ptr, *aligned_address;
-    unsigned char   *last_header;
-    msize_t         last_msize, last_index;
-
-    // Before anything we transform any remaining tiny_bytes_free_at_end into a
-    // regular free block.  We take special care here to update the bitfield
-    // information, since we are bypassing the normal free codepath.  If there
-    // is more than one quanta worth of memory in tiny_bytes_free_at_end, then
-    // there will be two block headers:
-    // 1) header for the free space at end, msize = 1
-    // 2) header inserted by set_tiny_meta_header_in_use after block
-    // We must clear the second one so that when the free block's size is
-    // queried, we do not think the block is only 1 quantum in size because
-    // of the second set header bit.
-    if (szone->tiny_bytes_free_at_end) {
-        last_block = TINY_REGION_END(szone->last_tiny_region) - szone->tiny_bytes_free_at_end;
-        last_msize = TINY_MSIZE_FOR_BYTES(szone->tiny_bytes_free_at_end);
-        last_header = TINY_BLOCK_HEADER_FOR_PTR(last_block);
-        last_index  = TINY_INDEX_FOR_PTR(last_block);
-        
-        if (last_index != (NUM_TINY_BLOCKS - 1))
-            BITARRAY_CLR(last_header, last_index + 1);
-        
-        tiny_free_list_add_ptr(szone, last_block, last_msize);
-        szone->tiny_bytes_free_at_end = 0;
-    }
-    // time to create a new region
-    aligned_address = allocate_pages(szone, TINY_REGION_SIZE, TINY_BLOCKS_ALIGN, 0, VM_MEMORY_MALLOC_TINY);
-    if (!aligned_address) // out of memory!
-        return NULL;
-    // We set the padding after block_header to be all 1
-    ((uint32_t *)(aligned_address + TINY_HEADER_START + (NUM_TINY_BLOCKS >> 3)))[0] = ~0;
-
-    // Check to see if the hash ring of tiny regions needs to grow.  Try to
-    // avoid the hash ring becoming too dense.
-    if (szone->num_tiny_regions_allocated < (2 * szone->num_tiny_regions)) {
-      region_t *new_regions;
-      size_t new_size;
-      new_regions = hash_regions_grow_no_lock(szone, szone->tiny_regions,
-                                              szone->num_tiny_regions_allocated,
-                                              &new_size);
-      // Do not deallocate the current tiny_regions allocation since someone may
-	  // be iterating it.  Instead, just leak it.
-      szone->tiny_regions = new_regions;
-      szone->num_tiny_regions_allocated = new_size;
-    }
-    // Insert the new region into the hash ring, and update malloc statistics
-    hash_region_insert_no_lock(szone->tiny_regions, 
-                               szone->num_tiny_regions_allocated, 
-                               aligned_address);
-    szone->last_tiny_region = aligned_address;
-
-    szone->num_tiny_regions++;
-    ptr = aligned_address; 
-    set_tiny_meta_header_in_use(ptr, msize);
-    szone->num_tiny_objects++;
-    szone->num_bytes_in_tiny_objects += TINY_BYTES_FOR_MSIZE(msize);
- 
-    // We put a header on the last block so that it appears in use (for coalescing, etc...)
-    set_tiny_meta_header_in_use(ptr + TINY_BYTES_FOR_MSIZE(msize), 1);
-    szone->tiny_bytes_free_at_end = TINY_BYTES_FOR_MSIZE(NUM_TINY_BLOCKS - msize);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in tiny_malloc_from_region_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
-    }
-#endif
-    return ptr;
-}
-
-static INLINE boolean_t
-try_realloc_tiny_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
-{
-    // returns 1 on success
-    msize_t	index;
-    msize_t	old_msize;
-    unsigned	next_index;
-    void	*next_block;
-    boolean_t	is_free;
-    msize_t	next_msize, coalesced_msize, leftover_msize;
-    void	*leftover;
-
-    index = TINY_INDEX_FOR_PTR(ptr);
-    old_msize = TINY_MSIZE_FOR_BYTES(old_size);
-    next_index = index + old_msize;
-    
-    if (next_index >= NUM_TINY_BLOCKS) {
-	return 0;
-    }
-    next_block = (char *)ptr + old_size;
-    SZONE_LOCK(szone);
-    is_free = tiny_meta_header_is_free(next_block);
-    if (!is_free) {
-	SZONE_UNLOCK(szone);
-	return 0; // next_block is in use;
-    }
-    next_msize = get_tiny_free_size(next_block);
-    if (old_size + TINY_MSIZE_FOR_BYTES(next_msize) < new_size) {
-	SZONE_UNLOCK(szone);
-	return 0; // even with next block, not enough
-    }
-    tiny_free_list_remove_ptr(szone, next_block, next_msize);
-    set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
-    coalesced_msize = TINY_MSIZE_FOR_BYTES(new_size - old_size + TINY_QUANTUM - 1);
-    leftover_msize = next_msize - coalesced_msize;
-    if (leftover_msize) {
-	leftover = next_block + TINY_BYTES_FOR_MSIZE(coalesced_msize);
-	tiny_free_list_add_ptr(szone, leftover, leftover_msize);
-    }
-    set_tiny_meta_header_in_use(ptr, old_msize + coalesced_msize);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in try_realloc_tiny_in_place(), ptr=%p, msize=%d\n", ptr, old_msize + coalesced_msize);
-    }
-#endif
-    szone->num_bytes_in_tiny_objects += TINY_BYTES_FOR_MSIZE(coalesced_msize);
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-    return 1;
-}
-
-static boolean_t
-tiny_check_region(szone_t *szone, region_t region)
-{
-    uintptr_t   start, ptr, region_end;
-    boolean_t   prev_free = 0;
-    boolean_t   is_free;
-    msize_t     msize;
-    free_list_t *free_head;
-    void        *follower, *previous, *next;
-
-    /* establish region limits */
-    start = (uintptr_t)TINY_REGION_ADDRESS(region);
-    ptr = start;
-    region_end = (uintptr_t)TINY_REGION_END(region);
-
-    /*
-     * The last region may have a trailing chunk which has not been converted into inuse/freelist
-     * blocks yet.
-     */
-    if (region == szone->last_tiny_region)
-	    region_end -= szone->tiny_bytes_free_at_end;
-
-
-    /*
-     * Scan blocks within the region.
-     */
-    while (ptr < region_end) {
-	/*
-	 * If the first block is free, and its size is 65536 (msize = 0) then the entire region is
-	 * free.
-	 */
-	msize = get_tiny_meta_header((void *)ptr, &is_free);
-	if (is_free && !msize && (ptr == start)) {
-	    return 1;
-	}
-
-	/*
-	 * If the block's size is 65536 (msize = 0) then since we're not the first entry the size is
-	 * corrupt.
-	 */
-	if (!msize) {
-	    malloc_printf("*** invariant broken for tiny block %p this msize=%d - size is too small\n",
-	      ptr, msize);
-	    return 0;
-	}
-
-	if (!is_free) {
-	    /*
-	     * In use blocks cannot be more than 31 quanta large.
-	     */
-	    prev_free = 0;
-	    if (msize > 31 * TINY_QUANTUM) {
-		malloc_printf("*** invariant broken for %p this tiny msize=%d[%p] - size is too large\n",
-		  ptr, msize, msize);
-		return 0;
-	    }
-	    /* move to next block */
-	    ptr += TINY_BYTES_FOR_MSIZE(msize);
-	} else {
-	    /*
-	     * Free blocks must have been coalesced, we cannot have a free block following another
-	     * free block.
-	     */
-	    if (prev_free) {
-		malloc_printf("*** invariant broken for free block %p this tiny msize=%d: two free blocks in a row\n",
-		  ptr, msize);
-		return 0;
-	    }
-	    prev_free = 1;
-	    /*
-	     * Check the integrity of this block's entry in its freelist.
-	     */
-	    free_head = (free_list_t *)ptr;
-	    free_list_checksum(szone, free_head, __PRETTY_FUNCTION__);
-	    previous = free_list_unchecksum_ptr(free_head->previous);
-	    next = free_list_unchecksum_ptr(free_head->next);
-	    if (previous && !tiny_meta_header_is_free(previous)) {
-		malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n",
-		  ptr, previous);
-		return 0;
-	    }
-	    if (next && !tiny_meta_header_is_free(next)) {
-		malloc_printf("*** invariant broken for %p (next in free list %p is not a free pointer)\n",
-		  ptr, next);
-		return 0;
-	    }
-	    /*
-	     * Check the free block's trailing size value.
-	     */
-	    follower = FOLLOWING_TINY_PTR(ptr, msize);
-	    if (((uintptr_t)follower != region_end) && (get_tiny_previous_free_msize(follower) != msize)) {
-		malloc_printf("*** invariant broken for tiny free %p followed by %p in region [%p-%p] "
-		  "(end marker incorrect) should be %d; in fact %d\n",
-		  ptr, follower, TINY_REGION_ADDRESS(region), region_end, msize, get_tiny_previous_free_msize(follower));
-		return 0;
-	    }
-	    /* move to next block */
-	    ptr = (uintptr_t)follower;
-	}
-    }
-    /*
-     * Ensure that we scanned the entire region
-     */
-    if (ptr != region_end) {
-	malloc_printf("*** invariant broken for region end %p - %p\n", ptr, region_end);
-	return 0;
-    }
-    /*
-     * Check the trailing block's integrity.
-     */
-    if (region == szone->last_tiny_region) {
-	if (szone->tiny_bytes_free_at_end) {
-	    msize = get_tiny_meta_header((void *)ptr, &is_free);
-	    if (is_free || (msize != 1)) {
-		malloc_printf("*** invariant broken for blocker block %p - %d %d\n", ptr, msize, is_free);
-	    }
-	}
-    }
-    return 1;
-}
-
-static kern_return_t
-tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder)
-{
-  size_t num_regions = szone->num_tiny_regions_allocated;
-  void *last_tiny_free = szone->last_tiny_free;
-  size_t index;
-  region_t	*regions;
-  vm_range_t		buffer[MAX_RECORDER_BUFFER];
-  unsigned		count = 0;
-  kern_return_t	err;
-  region_t	region;
-  vm_range_t		range;
-  vm_range_t		admin_range;
-  vm_range_t		ptr_range;
-  unsigned char	*mapped_region;
-  unsigned char	*block_header;
-  unsigned char	*in_use;
-  unsigned		block_index;
-  unsigned		block_limit;
-  boolean_t		is_free;
-  msize_t		msize;
-  void		*mapped_ptr;
-  unsigned 		bit;
-  vm_address_t last_tiny_free_ptr = 0;
-  msize_t last_tiny_free_msize = 0;
-  
-  if (last_tiny_free) {
-    last_tiny_free_ptr = (uintptr_t) last_tiny_free & ~(TINY_QUANTUM - 1);
-    last_tiny_free_msize = (uintptr_t) last_tiny_free & (TINY_QUANTUM - 1);
-  }
-  
-  err = reader(task, (vm_address_t)szone->tiny_regions, sizeof(region_t) * num_regions, (void **)&regions);
-  if (err) return err;
-  for (index = 0; index < num_regions; ++index) {
-    region = regions[index];
-    if (region) {
-      range.address = (vm_address_t)TINY_REGION_ADDRESS(region);
-      range.size = (vm_size_t)TINY_REGION_SIZE;
-      if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
-        admin_range.address = range.address + TINY_HEADER_START;
-        admin_range.size = TINY_HEADER_SIZE;
-        recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1);
-      }
-      if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) {
-        ptr_range.address = range.address;
-        ptr_range.size = NUM_TINY_BLOCKS * TINY_QUANTUM;
-        recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1);
-      }
-      if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
-        err = reader(task, range.address, range.size, (void **)&mapped_region);
-        if (err)
-          return err;
-        
-        block_header = (unsigned char *)(mapped_region + TINY_HEADER_START);
-        in_use = TINY_INUSE_FOR_HEADER(block_header);
-        block_index = 0;
-        block_limit = NUM_TINY_BLOCKS;
-        if (region == szone->last_tiny_region)
-          block_limit -= TINY_MSIZE_FOR_BYTES(szone->tiny_bytes_free_at_end);
-        
-        while (block_index < block_limit) {
-          vm_size_t block_offset = TINY_BYTES_FOR_MSIZE(block_index);
-          is_free = !BITARRAY_BIT(in_use, block_index);
-          if (is_free) {
-            mapped_ptr = mapped_region + block_offset;
-            
-            // mapped_region, the address at which 'range' in 'task' has been
-            // mapped into our process, is not necessarily aligned to 
-            // TINY_BLOCKS_ALIGN.
-            //
-            // Since the code in get_tiny_free_size() assumes the pointer came
-            // from a properly aligned tiny region, and mapped_region is not
-            // necessarily aligned, then do the size calculation directly.
-            // If the next bit is set in the header bitmap, then the size is one
-            // quantum.  Otherwise, read the size field.
-            if (!BITARRAY_BIT(block_header, block_index+1))
-              msize = TINY_FREE_SIZE(mapped_ptr);
-            else
-              msize = 1;
-            
-            if (!msize)
-              break;
-          } else if (range.address + block_offset != last_tiny_free_ptr) {
-            msize = 1; 
-            bit = block_index + 1;
-            while (! BITARRAY_BIT(block_header, bit)) {
-              bit++;
-              msize ++;
-            }
-            buffer[count].address = range.address + block_offset;
-            buffer[count].size = TINY_BYTES_FOR_MSIZE(msize);
-            count++;
-            if (count >= MAX_RECORDER_BUFFER) {
-              recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
-              count = 0;
-            }
-          } else {
-            // Block is not free but it matches last_tiny_free_ptr so even
-            // though it is not marked free in the bitmap, we treat it as if
-            // it is and move on
-            msize = last_tiny_free_msize;
-          }
-          block_index += msize;
-        }
-      }
-    }
-  }
-  if (count) {
-    recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
-  }
-  return 0;
-}
-
-static void *
-tiny_malloc_from_free_list(szone_t *szone, msize_t msize)
-{
-    // Assumes we've locked the region
-    free_list_t		*ptr;
-    msize_t         this_msize;
-    grain_t         slot = msize - 1;
-    free_list_t		**free_list = szone->tiny_free_list;
-    free_list_t		**the_slot = free_list + slot;
-    free_list_t		*next;
-    free_list_t		**limit;
-    unsigned		bitmap;
-    msize_t         leftover_msize;
-    free_list_t		*leftover_ptr;
-
-    // Assumes locked
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    
-    // Look for an exact match by checking the freelist for this msize.
-    // 
-    ptr = *the_slot;
-    if (ptr) {
-        next = free_list_unchecksum_ptr(ptr->next);
-        if (next) {
-            next->previous = ptr->previous;
-        } else {
-            BITMAP32_CLR(szone->tiny_bitmap, slot);
-        }
-        *the_slot = next;
-        this_msize = msize;
-#if DEBUG_MALLOC
-        if (LOG(szone, ptr)) {
-            malloc_printf("in tiny_malloc_from_free_list(), exact match ptr=%p, this_msize=%d\n", ptr, this_msize);
-        }
-#endif
-        goto return_tiny_alloc;
-    }
-
-    // Mask off the bits representing slots holding free blocks smaller than the
-    // size we need.  If there are no larger free blocks, try allocating from
-    // the free space at the end of the tiny region.
-    bitmap = szone->tiny_bitmap & ~ ((1 << slot) - 1);
-    if (!bitmap)
-        goto try_tiny_malloc_from_end;
-        
-    slot = BITMAP32_CTZ(bitmap);
-    limit = free_list + NUM_TINY_SLOTS - 1;
-    free_list += slot;
-    
-    // Iterate over freelists looking for free blocks, starting at first list
-    // which is not empty, and contains blocks which are large enough to satisfy
-    // our request.
+        }
+        // malloc_printf("End region - count=%d\n", count);
+    }
+    if (count) recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
+    return 0;
+}
+
+static INLINE void *small_malloc_from_free_list(szone_t *szone, msize_t msize, boolean_t *locked) {
+    void	*ptr;
+    msize_t	this_msize;
+    free_list_t	**free_list;
+    free_list_t	**limit = szone->free_list + MAX_GRAIN - 1;
+    // first try the small grains
+    free_list = szone->free_list + grain_for_msize(szone, msize);
     while (free_list < limit) {
+        // try bigger grains
         ptr = *free_list;
         if (ptr) {
-            next = free_list_unchecksum_ptr(ptr->next);
-            *free_list = next;
-            this_msize = get_tiny_free_size(ptr);
-            if (next) {
-                next->previous = ptr->previous;
-            } else {
-                BITMAP32_CLR(szone->tiny_bitmap, this_msize - 1);
+            if (!*locked) { *locked = 1; SZONE_LOCK(szone); CHECK(szone, __PRETTY_FUNCTION__); }
+            ptr = *free_list;
+            if (ptr) {
+                // optimistic test worked
+                free_list_t	*next;
+                next = ((free_list_t *)ptr)->next;
+                if (next) {
+                    next->previous = NULL;
+                    free_list_set_checksum(szone, next);
+                }
+                *free_list = next;
+                this_msize = MSIZE_FLAGS_FOR_PTR(ptr) & ~THIS_FREE;
+                MSIZE_FLAGS_FOR_PTR(FOLLOWING_PTR(ptr, this_msize)) &= ~ PREV_FREE;
+                goto add_leftover_and_proceed;
             }
+        }
+        free_list++;
+    }
+    // We now check the large grains for one that is big enough
+    if (!*locked) { *locked = 1; SZONE_LOCK(szone); CHECK(szone, __PRETTY_FUNCTION__); }
+    ptr = *free_list;
+    while (ptr) {
+        this_msize = MSIZE_FLAGS_FOR_PTR(ptr) & ~THIS_FREE;
+        if (this_msize >= msize) {
+            free_list_remove_ptr(szone, ptr, this_msize);
             goto add_leftover_and_proceed;
         }
-        free_list++;
-    }
-
-    // We are now looking at the last slot, which contains blocks equal to, or
-    // due to coalescing of free blocks, larger than 31 * tiny quantum size.
-    // If the last freelist is not empty, and the head contains a block that is
-    // larger than our request, then the remainder is put back on the free list.
-    ptr = *limit;
-    if (ptr) {
-        free_list_checksum(szone, ptr, __PRETTY_FUNCTION__);
-        this_msize = get_tiny_free_size(ptr);
-        next = free_list_unchecksum_ptr(ptr->next);
-        if (this_msize - msize >= NUM_TINY_SLOTS) {
-            // the leftover will go back to the free list, so we optimize by
-            // modifying the free list rather than a pop and push of the head
-            leftover_msize = this_msize - msize;
-            leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
-            *limit = leftover_ptr;
-            if (next) {
-                next->previous.u = free_list_checksum_ptr(leftover_ptr);
-            }
-            leftover_ptr->previous = ptr->previous;
-            leftover_ptr->next = ptr->next;
-            set_tiny_meta_header_free(leftover_ptr, leftover_msize);
-#if DEBUG_MALLOC
-            if (LOG(szone,ptr)) {
-            malloc_printf("in tiny_malloc_from_free_list(), last slot ptr=%p, msize=%d this_msize=%d\n", ptr, msize, this_msize);
-            }
-#endif
-            this_msize = msize;
-            goto return_tiny_alloc;
-        }
-        if (next) {
-            next->previous = ptr->previous;
-        }
-        *limit = next;
-        goto add_leftover_and_proceed;
-    }
-
-try_tiny_malloc_from_end:
-    // Let's see if we can use szone->tiny_bytes_free_at_end
-    if (szone->tiny_bytes_free_at_end >= TINY_BYTES_FOR_MSIZE(msize)) {
-        ptr = (free_list_t *)(TINY_REGION_END(szone->last_tiny_region) - szone->tiny_bytes_free_at_end);
-        szone->tiny_bytes_free_at_end -= TINY_BYTES_FOR_MSIZE(msize);
-        if (szone->tiny_bytes_free_at_end) {
-            // let's add an in use block after ptr to serve as boundary
-            set_tiny_meta_header_in_use((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize), 1);
-        }
+        ptr = ((free_list_t *)ptr)->next;
+    }
+    return NULL;
+add_leftover_and_proceed:
+    if (this_msize >= msize + MIN_BLOCK) {
+        if (LOG(szone,ptr)) malloc_printf("In small_malloc_should_clear(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize);
+        free_list_add_ptr(szone, ptr + (msize << SHIFT_QUANTUM), this_msize - msize);
         this_msize = msize;
-#if DEBUG_MALLOC
-        if (LOG(szone, ptr)) {
-            malloc_printf("in tiny_malloc_from_free_list(), from end ptr=%p, msize=%d\n", ptr, msize);
-        }
-#endif
-        goto return_tiny_alloc;
-    }
-    return NULL;
-
-add_leftover_and_proceed:
-    if (!this_msize || (this_msize > msize)) {
-        leftover_msize = this_msize - msize;
-        leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
-#if DEBUG_MALLOC
-        if (LOG(szone,ptr)) {
-            malloc_printf("in tiny_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize);
-        }
-#endif
-        tiny_free_list_add_ptr(szone, leftover_ptr, leftover_msize);
-        this_msize = msize;
-    }
-    
-return_tiny_alloc:
-    szone->num_tiny_objects++;
-    szone->num_bytes_in_tiny_objects += TINY_BYTES_FOR_MSIZE(this_msize);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in tiny_malloc_from_free_list(), ptr=%p, this_msize=%d, msize=%d\n", ptr, this_msize, msize);
-    }
-#endif
-    set_tiny_meta_header_in_use(ptr, this_msize);
+    }
+    szone->num_small_objects++;
+    szone->num_bytes_in_small_objects += this_msize << SHIFT_QUANTUM;
+#if DEBUG_MALLOC
+    if (LOG(szone,ptr)) malloc_printf("In small_malloc_should_clear(), ptr=%p, this_msize=%d, msize=%d\n", ptr, this_msize, msize);
+#endif
+    MSIZE_FLAGS_FOR_PTR(ptr) = this_msize;
     return ptr;
 }
 
-static INLINE void *
-tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested)
-{
+static INLINE void *small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) {
     boolean_t	locked = 0;
     void	*ptr;
-
-#if DEBUG_MALLOC
-    if (!msize) {
-	szone_error(szone, "invariant broken (!msize) in allocation (region)", NULL, NULL);
-	return(NULL);
-    }
-#endif
-#if TINY_CACHE
-    ptr = szone->last_tiny_free;
-    if ((((uintptr_t)ptr) & (TINY_QUANTUM - 1)) == msize) {
-	// we have a candidate - let's lock to make sure
-	LOCK_AND_NOTE_LOCKED(szone, locked);
-	if (ptr == szone->last_tiny_free) {
-	    szone->last_tiny_free = NULL;
-	    SZONE_UNLOCK(szone);
-	    CHECK(szone, __PRETTY_FUNCTION__);
-	    ptr = (void *)((uintptr_t)ptr & ~ (TINY_QUANTUM - 1));
-	    if (cleared_requested) {
-		memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
-	    }
-#if DEBUG_MALLOC
-	    if (LOG(szone,ptr)) {
-		malloc_printf("in tiny_malloc_should_clear(), tiny cache ptr=%p, msize=%d\n", ptr, msize);
-	    }
-#endif
-	    return ptr;
-	}
-    }
-#endif
-    // Except in rare occasions where we need to add a new region, we are going to end up locking, so we might as well lock right away to avoid doing unnecessary optimistic probes
-    if (!locked) LOCK_AND_NOTE_LOCKED(szone, locked);
-    ptr = tiny_malloc_from_free_list(szone, msize);
+#if DEBUG_MALLOC
+    if (! (msize & 0xffff)) {
+        szone_error(szone, "Invariant broken (!msize) in allocation (region)", NULL);
+    }
+    if (msize < MIN_BLOCK) {
+        szone_error(szone, "Invariant broken (msize too small) in allocation (region)", NULL);
+    }
+#endif
+    ptr = small_malloc_from_free_list(szone, msize, &locked);
     if (ptr) {
-	SZONE_UNLOCK(szone);
-	CHECK(szone, __PRETTY_FUNCTION__);
-	if (cleared_requested) {
-	    memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
-	}
-	return ptr;
-    }
-    ptr = tiny_malloc_from_region_no_lock(szone, msize);
-    // we don't clear because this freshly allocated space is pristine
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
+        CHECK(szone, __PRETTY_FUNCTION__);
+        SZONE_UNLOCK(szone);
+        if (cleared_requested) memset(ptr, 0, (msize << SHIFT_QUANTUM) - PTR_HEADER_SIZE);
+        return ptr;
+    } else {
+        if (!locked) SZONE_LOCK(szone);
+        CHECK(szone, __PRETTY_FUNCTION__);
+        ptr = small_malloc_from_region_no_lock(szone, msize);
+        // we don't clear because this freshly allocated space is pristine
+        CHECK(szone, __PRETTY_FUNCTION__);
+        SZONE_UNLOCK(szone);
+    }
     return ptr;
 }
 
-static INLINE void
-free_tiny(szone_t *szone, void *ptr, region_t *tiny_region)
-{
-    msize_t	msize;
-    boolean_t	is_free;
-#if TINY_CACHE
-    void *ptr2;
-#endif
-
-    // ptr is known to be in tiny_region
-    SZONE_LOCK(szone);
-#if TINY_CACHE
-    ptr2 = szone->last_tiny_free;
-    /* check that we don't already have this pointer in the cache */
-    if (ptr == (void *)((uintptr_t)ptr2 & ~ (TINY_QUANTUM - 1))) {
-	szone_error(szone, "double free", ptr, NULL);
-	return;
-    }
-#endif /* TINY_CACHE */
-    msize = get_tiny_meta_header(ptr, &is_free);
-    if (is_free) {
-	szone_error(szone, "double free", ptr, NULL);
-	return;
-    }
-#if DEBUG_MALLOC
-    if (!msize) {
-	malloc_printf("*** szone_free() block in use is too large: %p\n", ptr);
-	return;
-    }
-#endif
-#if TINY_CACHE
-    if (msize < TINY_QUANTUM) {	// to see if the bits fit in the last 4 bits
-    if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize)
-        memset(ptr, 0x55, TINY_BYTES_FOR_MSIZE(msize));
-    szone->last_tiny_free = (void *)(((uintptr_t)ptr) | msize);
-	if (!ptr2) {
-	    SZONE_UNLOCK(szone);
-	    CHECK(szone, __PRETTY_FUNCTION__);
-	    return;
-	}
-	msize = (uintptr_t)ptr2 & (TINY_QUANTUM - 1);
-	ptr = (void *)(((uintptr_t)ptr2) & ~(TINY_QUANTUM - 1));
-	tiny_region = tiny_region_for_ptr_no_lock(szone, ptr);
-	if (!tiny_region) {
-	    szone_error(szone, "double free (tiny cache)", ptr, NULL);
-	}
-    }
-#endif
-    tiny_free_no_lock(szone, tiny_region, ptr, msize);
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-}
-
-static void
-print_tiny_free_list(szone_t *szone)
-{
-    grain_t	slot = 0;
-    free_list_t	*ptr;
-    _SIMPLE_STRING b = _simple_salloc();
-
-    if (b) {
-	_simple_sappend(b, "tiny free sizes: ");
-	while (slot < NUM_TINY_SLOTS) {
-	    ptr = szone->tiny_free_list[slot];
-	    if (ptr) {
-		_simple_sprintf(b, "%s%y[%d]; ", (slot == NUM_TINY_SLOTS-1) ? ">=" : "", (slot+1)*TINY_QUANTUM, free_list_count(ptr));
-	    }
-	    slot++;
-	}
-	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
-	_simple_sfree(b);
-    }
-}
-
-static void
-print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_end)
-{
-    unsigned	counts[1024];
-    unsigned	in_use = 0;
-    uintptr_t	start = (uintptr_t)TINY_REGION_ADDRESS(region);
-    uintptr_t	current = start;
-    uintptr_t	limit =  (uintptr_t)TINY_REGION_END(region) - bytes_at_end;
-    boolean_t	is_free;
-    msize_t	msize;
-    unsigned	ci;
-    _SIMPLE_STRING b;
-    
-    memset(counts, 0, 1024 * sizeof(unsigned));
-    while (current < limit) {
-	msize = get_tiny_meta_header((void *)current, &is_free);
-	if (is_free & !msize && (current == start)) {
-	    // first block is all free
-	    break;
-	}
-	if (!msize) {
-	    malloc_printf("*** error with %p: msize=%d\n", (void *)current, (unsigned)msize);
-	    break;
-	}
-	if (!is_free) {
-	    // block in use
-	    if (msize > 32)
-		malloc_printf("*** error at %p msize for in_use is %d\n", (void *)current, msize);
-	    if (msize < 1024)
-		counts[msize]++;
-	    in_use++;
-	}
-	current += TINY_BYTES_FOR_MSIZE(msize);
-    }
-    if ((b = _simple_salloc()) != NULL) {
-	_simple_sprintf(b, "Tiny region [%p-%p, %y]\t", (void *)start, TINY_REGION_END(region), (int)TINY_REGION_SIZE);
-	_simple_sprintf(b, "In_use=%d ", in_use);
-	if (bytes_at_end) _simple_sprintf(b, "untouched=%ly", bytes_at_end);
-	if (verbose && in_use) {
-	    _simple_sappend(b, "\n\tSizes in use: ");
-	    for (ci = 0; ci < 1024; ci++)
-		if (counts[ci])
-		    _simple_sprintf(b, "%d[%d]", TINY_BYTES_FOR_MSIZE(ci), counts[ci]);
-	}
-	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
-	_simple_sfree(b);
-    }
-}
-
-static boolean_t
-tiny_free_list_check(szone_t *szone, grain_t slot)
-{
-    unsigned	count = 0;
-    free_list_t	*ptr = szone->tiny_free_list[slot];
-    free_list_t	*previous = NULL;
-    boolean_t	is_free;
-
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    while (ptr) {
-        free_list_checksum(szone, ptr, __PRETTY_FUNCTION__);
-        is_free = tiny_meta_header_is_free(ptr);
-        if (! is_free) {
-            malloc_printf("*** in-use ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr);
-            return 0;
-        }
-        if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) {
-            malloc_printf("*** unaligned ptr in free list slot=%d  count=%d ptr=%p\n", slot, count, ptr);
-            return 0;
-        }
-        if (!tiny_region_for_ptr_no_lock(szone, ptr)) {
-            malloc_printf("*** ptr not in szone slot=%d  count=%d ptr=%p\n", slot, count, ptr);
-            return 0;
-        }
-        if (free_list_unchecksum_ptr(ptr->previous) != previous) {
-            malloc_printf("*** previous incorrectly set slot=%d  count=%d ptr=%p\n", slot, count, ptr);
-            return 0;
-        }
-        previous = ptr;
-        ptr = free_list_unchecksum_ptr(ptr->next);
-        count++;
-    }
-    return 1;
-}
-
-/*********************	SMALL FREE LIST UTILITIES	************************/
-
-/*
- * Mark a block as free.  Only the first quantum of a block is marked thusly,
- * the remainder are marked "middle".
- */
-static INLINE void
-small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize)
-{
-    meta_headers[index] = msize | SMALL_IS_FREE;
-}
-
-/*
- * Mark a block as in use.  Only the first quantum of a block is marked thusly,
- * the remainder are marked "middle".
- */
-static INLINE void
-small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize)
-{
-    meta_headers[index] = msize;
-}
-
-/*
- * Mark a quantum as being the second or later in a block.
- */
-static INLINE void
-small_meta_header_set_middle(msize_t *meta_headers, msize_t index)
-{
-    meta_headers[index] = 0;
-}
-
-// Adds an item to the proper free list
-// Also marks the header of the block properly
-// Assumes szone has been locked    
-static void
-small_free_list_add_ptr(szone_t *szone, void *ptr, msize_t msize)
-{
-    grain_t	slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1;
-    free_list_t	*free_ptr = ptr;
-    free_list_t	*free_head = szone->small_free_list[slot];
-    void	*follower;
-
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-        malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
-    }
-    if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) {
-        szone_error(szone, "small_free_list_add_ptr: Unaligned ptr", ptr, NULL);
-    }
-#endif
-    if (free_head) {
-        free_list_checksum(szone, free_head, __PRETTY_FUNCTION__);
-#if DEBUG_MALLOC
-        if (free_list_unchecksum_ptr(free_head->previous)) {
-            szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr,
-		"ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, free_head, free_head->previous.p);
-        }
-        if (!SMALL_PTR_IS_FREE(free_head)) {
-            szone_error(szone, "small_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr,
-		"ptr=%p slot=%d free_head=%p\n", ptr, slot, free_head);
-        }
-#endif
-        free_head->previous.u = free_list_checksum_ptr(free_ptr);
+static INLINE void *small_malloc_cleared_no_lock(szone_t *szone, msize_t msize) {
+    // tries to allocate a small, cleared block
+    boolean_t	locked = 1;
+    void	*ptr;
+    ptr = small_malloc_from_free_list(szone, msize, &locked);
+    if (ptr) {
+        memset(ptr, 0, (msize << SHIFT_QUANTUM) - PTR_HEADER_SIZE);
+        return ptr;
     } else {
-        BITMAP32_SET(szone->small_bitmap, slot);
-    }
-    free_ptr->previous.p = NULL;
-    free_ptr->next.p = free_head;
-    free_list_set_checksum(szone, free_ptr);
-    szone->small_free_list[slot] = free_ptr;
-    follower = ptr + SMALL_BYTES_FOR_MSIZE(msize);
-    SMALL_PREVIOUS_MSIZE(follower) = msize;
-}
-
-// Removes item in the proper free list
-// msize could be read, but all callers have it so we pass it in
-// Assumes szone has been locked
-static void
-small_free_list_remove_ptr(szone_t *szone, void *ptr, msize_t msize)
-{
-    grain_t	slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1;
-    free_list_t	*free_ptr = ptr, *next, *previous;
-    free_list_checksum(szone, free_ptr, __PRETTY_FUNCTION__);
-
-    next = free_list_unchecksum_ptr(free_ptr->next);
-    previous = free_list_unchecksum_ptr(free_ptr->previous);
-
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-        malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
-    }
-#endif
-    if (!previous) { 
-    // The block to remove is the head of the free list
-#if DEBUG_MALLOC
-        if (szone->small_free_list[slot] != ptr) {
-            szone_error(szone, "small_free_list_remove_ptr: Internal invariant broken (szone->small_free_list[grain])", ptr,
-              "ptr=%p slot=%d msize=%d szone->small_free_list[slot]=%p\n",
-              ptr, slot, msize, szone->small_free_list[slot]);
-            return;
-        }
-#endif
-        szone->small_free_list[slot] = next;
-        if (!next) BITMAP32_CLR(szone->small_bitmap, slot);
-    } else {
-        // We know free_ptr is already checksummed, so we don't need to do it
-        // again.
-        previous->next = free_ptr->next;
-    }
-    if (next) {
-        // We know free_ptr is already checksummed, so we don't need to do it
-        // again.
-        next->previous = free_ptr->previous;
-    }
-}
-
-static INLINE region_t *
-small_region_for_ptr_no_lock(szone_t *szone, const void *ptr)
-{
-  return hash_lookup_region_no_lock(szone->small_regions,
-                                    szone->num_small_regions_allocated,
-                                    SMALL_REGION_FOR_PTR(ptr));
-}
-
-static INLINE void
-small_free_no_lock(szone_t *szone, region_t *region, void *ptr, msize_t msize)
-{
-    msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
-    unsigned		index = SMALL_META_INDEX_FOR_PTR(ptr);
-    size_t		original_size = SMALL_BYTES_FOR_MSIZE(msize);
-    unsigned char	*next_block = ((unsigned char *)ptr + original_size);
-    msize_t		next_index = index + msize;
-    msize_t		previous_msize, next_msize;
-    void		*previous;
-
-    // Assumes locked
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
-    }
-    if (! msize) {
-	szone_error(szone, "trying to free small block that is too small", ptr,
-	    "in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
-    }
-#endif
-    // We try to coalesce this block with the preceeding one
-    if (index && (SMALL_PREVIOUS_MSIZE(ptr) <= index)) {
-	previous_msize = SMALL_PREVIOUS_MSIZE(ptr);
-	if (meta_headers[index - previous_msize] == (previous_msize | SMALL_IS_FREE)) {
-	    previous = ptr - SMALL_BYTES_FOR_MSIZE(previous_msize);
-	    // previous is really to be coalesced
-#if DEBUG_MALLOC
-	    if (LOG(szone, ptr) || LOG(szone,previous)) { 
-		malloc_printf("in small_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous);
-	    }
-#endif
-	    small_free_list_remove_ptr(szone, previous, previous_msize);
-	    small_meta_header_set_middle(meta_headers, index);
-	    ptr = previous;
-	    msize += previous_msize;
-	    index -= previous_msize;
-	}
-    }
-    // We try to coalesce with the next block
-    if ((next_block < SMALL_REGION_END(*region)) && (meta_headers[next_index] & SMALL_IS_FREE)) {
-	// next block is free, we coalesce
-	next_msize = meta_headers[next_index] & ~ SMALL_IS_FREE;
-#if DEBUG_MALLOC
-	if (LOG(szone,ptr)) malloc_printf("In small_free_no_lock(), for ptr=%p, msize=%d coalesced next block=%p next_msize=%d\n", ptr, msize, next_block, next_msize);
-#endif
-	small_free_list_remove_ptr(szone, next_block, next_msize);
-	small_meta_header_set_middle(meta_headers, next_index);
-	msize += next_msize;
-    }
-    if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) {
-	if (!msize) {
-	    szone_error(szone, "incorrect size information - block header was damaged", ptr, NULL);
-	} else {
-	    memset(ptr, 0x55, SMALL_BYTES_FOR_MSIZE(msize));
-	}
-    }
-    small_free_list_add_ptr(szone, ptr, msize);
-    small_meta_header_set_is_free(meta_headers, index, msize);
-    szone->num_small_objects--;
-    szone->num_bytes_in_small_objects -= original_size; // we use original_size and not msize to avoid double counting the coalesced blocks
-}
-
-static void *
-small_malloc_from_region_no_lock(szone_t *szone, msize_t msize)
-{
-    void		*last_block;
-    void		*ptr;
-    void 		*new_address;
-    msize_t		*meta_headers;
-    msize_t		index ;
-    msize_t		msize_left;
-
-    // Allocates from the last region or a freshly allocated region
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    // Before anything we transform the small_bytes_free_at_end - if any - to a regular free block
-    if (szone->small_bytes_free_at_end) {
-      last_block = (void *)(SMALL_REGION_END(szone->last_small_region) - szone->small_bytes_free_at_end);
-      small_free_list_add_ptr(szone, last_block, SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end));
-      *SMALL_METADATA_FOR_PTR(last_block) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end) | SMALL_IS_FREE;
-      szone->small_bytes_free_at_end = 0;
-    }
-    // time to create a new region
-    new_address = allocate_pages(szone, SMALL_REGION_SIZE, SMALL_BLOCKS_ALIGN,
-                                 0, VM_MEMORY_MALLOC_SMALL);
-    if (!new_address)
-      return NULL;
-
-    ptr = new_address;
-    meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
-    index = 0;
-    
-    // Check to see if the hash ring of small regions needs to grow.  Try to
-    // avoid the hash ring becoming too dense.
-    if (szone->num_small_regions_allocated < (2 * szone->num_small_regions)) {
-      region_t *new_regions;
-      size_t new_size;
-      new_regions = hash_regions_grow_no_lock(szone, szone->small_regions,
-                                              szone->num_small_regions_allocated,
-                                              &new_size);
-      // Do not deallocate the current small_regions allocation since someone
-	  // may be iterating it.  Instead, just leak it.
-      szone->small_regions = new_regions;
-      szone->num_small_regions_allocated = new_size;
-    }
-    // Insert the new region into the hash ring, and update malloc statistics
-    hash_region_insert_no_lock(szone->small_regions, 
-                               szone->num_small_regions_allocated,
-                               new_address);
-    szone->last_small_region = new_address;
-                               
-    // we bump the number of regions AFTER we have changes the regions pointer 
-    // to enable finding a small region without taking the lock
-    //
-    // FIXME: naive assumption assumes memory ordering coherence between this
-    // and other CPUs.  This also applies to the near-identical code in
-    // tiny_malloc_from_region_no_lock.
-    szone->num_small_regions++;
-    small_meta_header_set_in_use(meta_headers, index, msize);
-    msize_left = NUM_SMALL_BLOCKS - index;
-    szone->num_small_objects++;
-    szone->num_bytes_in_small_objects += SMALL_BYTES_FOR_MSIZE(msize);
-    // add a big free block
-    index += msize; msize_left -= msize;
-    meta_headers[index] = msize_left;
-    szone->small_bytes_free_at_end = SMALL_BYTES_FOR_MSIZE(msize_left);
+        ptr = small_malloc_from_region_no_lock(szone, msize);
+        // we don't clear because this freshly allocated space is pristine
+    }
     return ptr;
 }
 
-static INLINE boolean_t
-try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
-{
-    // returns 1 on success
-    msize_t	*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
-    unsigned	index = SMALL_META_INDEX_FOR_PTR(ptr);
-    msize_t	old_msize = SMALL_MSIZE_FOR_BYTES(old_size);
-    msize_t	new_msize = SMALL_MSIZE_FOR_BYTES(new_size + SMALL_QUANTUM - 1);
-    void	*next_block = (char *)ptr + old_size;
-    unsigned	next_index = index + old_msize;
-    msize_t	next_msize_and_free;
-    msize_t	next_msize;
-    msize_t	leftover_msize;
-    void	*leftover;
-    unsigned	leftover_index;
-
-    if (next_index >= NUM_SMALL_BLOCKS) {
-	return 0;
-    }
-#if DEBUG_MALLOC
-    if ((uintptr_t)next_block & (SMALL_QUANTUM - 1)) {
-	szone_error(szone, "internal invariant broken in realloc(next_block)", next_block, NULL);
-    }
-    if (meta_headers[index] != old_msize)
-	malloc_printf("*** try_realloc_small_in_place incorrect old %d %d\n",
-	  meta_headers[index], old_msize);
-#endif
-    SZONE_LOCK(szone);
-    /*
-     * Look for a free block immediately afterwards.  If it's large enough, we can consume (part of)
-     * it.
-     */
-    next_msize_and_free = meta_headers[next_index];
-    next_msize = next_msize_and_free & ~ SMALL_IS_FREE;
-    if (!(next_msize_and_free & SMALL_IS_FREE) || (old_msize + next_msize < new_msize)) {
-	SZONE_UNLOCK(szone);
-	return 0;
-    }
-    /*
-     * The following block is big enough; pull it from its freelist and chop off enough to satisfy
-     * our needs.
-     */
-    small_free_list_remove_ptr(szone, next_block, next_msize);
-    small_meta_header_set_middle(meta_headers, next_index);
-    leftover_msize = old_msize + next_msize - new_msize;
-    if (leftover_msize) {
-	/* there's some left, so put the remainder back */
-	leftover = (unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(new_msize);
-	small_free_list_add_ptr(szone, leftover, leftover_msize);
-	leftover_index = index + new_msize;
-	small_meta_header_set_is_free(meta_headers, leftover_index, leftover_msize);
-    }
-#if DEBUG_MALLOC
-    if (SMALL_BYTES_FOR_MSIZE(new_msize) >= LARGE_THRESHOLD) {
-	malloc_printf("*** realloc in place for %p exceeded msize=%d\n", new_msize);
-    }
-#endif
-    small_meta_header_set_in_use(meta_headers, index, new_msize);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in szone_realloc(), ptr=%p, msize=%d\n", ptr, *SMALL_METADATA_FOR_PTR(ptr));
-    }
-#endif
-    szone->num_bytes_in_small_objects += SMALL_BYTES_FOR_MSIZE(new_msize - old_msize);
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-    return 1;
-}
-
-static boolean_t
-szone_check_small_region(szone_t *szone, region_t region)
-{
-    unsigned char	*ptr = SMALL_REGION_ADDRESS(region);
-    msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
-    unsigned char	*region_end = SMALL_REGION_END(region);
-    msize_t		prev_free = 0;
-    unsigned		index;
-    msize_t		msize_and_free;
-    msize_t		msize;
-    free_list_t		*free_head;
-    void            *previous, *next;
-    msize_t		*follower;
-
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    if (region == szone->last_small_region) region_end -= szone->small_bytes_free_at_end;
-    while (ptr < region_end) {
-	index = SMALL_META_INDEX_FOR_PTR(ptr);
-	msize_and_free = meta_headers[index];
-	if (!(msize_and_free & SMALL_IS_FREE)) {
-	    // block is in use
-	    msize = msize_and_free;
-	    if (!msize) {
-		malloc_printf("*** invariant broken: null msize ptr=%p num_small_regions=%d end=%p\n",
-		  ptr, szone->num_small_regions, region_end);
-		return 0;
-	    }
-	    if (msize > (LARGE_THRESHOLD / SMALL_QUANTUM)) {
-		malloc_printf("*** invariant broken for %p this small msize=%d - size is too large\n",
-		  ptr, msize_and_free);
-		return 0;
-	    }
-	    ptr += SMALL_BYTES_FOR_MSIZE(msize);
-	    prev_free = 0;
-	} else {
-	    // free pointer
-	    msize = msize_and_free & ~ SMALL_IS_FREE;
-	    free_head = (free_list_t *)ptr;
-	    follower = (msize_t *)FOLLOWING_SMALL_PTR(ptr, msize);
-	    if (!msize) {
-		malloc_printf("*** invariant broken for free block %p this msize=%d\n", ptr, msize);
-		return 0;
-	    }
-	    if (prev_free) {
-		malloc_printf("*** invariant broken for %p (2 free in a row)\n", ptr);
-		return 0;
-	    }
-	    free_list_checksum(szone, free_head, __PRETTY_FUNCTION__);
-	    previous = free_list_unchecksum_ptr(free_head->previous);
-	    next = free_list_unchecksum_ptr(free_head->next);
-	    if (previous && !SMALL_PTR_IS_FREE(previous)) {
-		malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n",
-		  ptr, free_head->previous);
-		return 0;
-	    }
-	    if (next && !SMALL_PTR_IS_FREE(next)) {
-		malloc_printf("*** invariant broken for %p (next is not a free pointer)\n", ptr);
-		return 0;
-	    }
-	    if (SMALL_PREVIOUS_MSIZE(follower) != msize) {
-		malloc_printf("*** invariant broken for small free %p followed by %p in region [%p-%p] "
-		  "(end marker incorrect) should be %d; in fact %d\n",
-		  ptr, follower, SMALL_REGION_ADDRESS(region), region_end, msize, SMALL_PREVIOUS_MSIZE(follower));
-		return 0;
-	    }
-	    ptr = (unsigned char *)follower;
-	    prev_free = SMALL_IS_FREE;
-	}
-    }
-    return 1;
-}
-
-static kern_return_t
-small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone, memory_reader_t reader, vm_range_recorder_t recorder)
-{
-  size_t num_regions = szone->num_small_regions_allocated;
-  void *last_small_free = szone->last_small_free; 
-  size_t	index;
-  region_t	*regions;
-  vm_range_t		buffer[MAX_RECORDER_BUFFER];
-  unsigned		count = 0;
-  kern_return_t	err;
-  region_t	region;
-  vm_range_t		range;
-  vm_range_t		admin_range;
-  vm_range_t		ptr_range;
-  unsigned char	*mapped_region;
-  msize_t		*block_header;
-  unsigned		block_index;
-  unsigned		block_limit;
-  msize_t		msize_and_free;
-  msize_t		msize;
-  vm_address_t last_small_free_ptr = 0;
-  msize_t last_small_free_msize = 0;
-  
-  if (last_small_free) {
-    last_small_free_ptr = (uintptr_t)last_small_free & ~(SMALL_QUANTUM - 1);
-    last_small_free_msize = (uintptr_t)last_small_free & (SMALL_QUANTUM - 1);
-  }
-  
-  err = reader(task, (vm_address_t)szone->small_regions, sizeof(region_t) * num_regions, (void **)&regions);
-  if (err) return err;
-  for (index = 0; index < num_regions; ++index) {
-    region = regions[index];
-    if (region) {
-      range.address = (vm_address_t)SMALL_REGION_ADDRESS(region);
-      range.size = SMALL_REGION_SIZE;
-      if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
-        admin_range.address = range.address + SMALL_HEADER_START;
-        admin_range.size = SMALL_ARRAY_SIZE;
-        recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1);
-      }
-      if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) {
-        ptr_range.address = range.address;
-        ptr_range.size = NUM_SMALL_BLOCKS * SMALL_QUANTUM;
-        recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1);
-      }
-      if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
-        err = reader(task, range.address, range.size, (void **)&mapped_region);
-        if (err) return err;
-        block_header = (msize_t *)(mapped_region + SMALL_HEADER_START);
-        block_index = 0;
-        block_limit = NUM_SMALL_BLOCKS;
-        if (region == szone->last_small_region)
-          block_limit -= SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end);
-        while (block_index < block_limit) {
-          msize_and_free = block_header[block_index];
-          msize = msize_and_free & ~ SMALL_IS_FREE;
-          if (! (msize_and_free & SMALL_IS_FREE) &&
-              range.address + SMALL_BYTES_FOR_MSIZE(block_index) != last_small_free_ptr) {
-            // Block in use
-            buffer[count].address = range.address + SMALL_BYTES_FOR_MSIZE(block_index);
-            buffer[count].size = SMALL_BYTES_FOR_MSIZE(msize);
-            count++;
-            if (count >= MAX_RECORDER_BUFFER) {
-              recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
-              count = 0;
-            }
-          }
-          block_index += msize;
-        }
-      }
-    }
-  }
-  if (count) {
-    recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
-  }
-  return 0;
-}
-
-static void *
-small_malloc_from_free_list(szone_t *szone, msize_t msize)
-{
-    free_list_t		*ptr;
-    msize_t         this_msize;
-    grain_t         slot = (msize <= NUM_SMALL_SLOTS) ? msize - 1 : NUM_SMALL_SLOTS - 1;
-    free_list_t		**free_list = szone->small_free_list;
-    free_list_t     *next;
-    free_list_t     **limit;
-    unsigned        bitmap = szone->small_bitmap & ~ ((1 << slot) - 1);
-    msize_t         leftover_msize;
-    free_list_t     *leftover_ptr;
-    msize_t         *meta_headers;
-    unsigned        leftover_index;
-
-    // Assumes locked
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    
-    // Mask off the bits representing slots holding free blocks smaller than the
-    // size we need.  If there are no larger free blocks, try allocating from
-    // the free space at the end of the tiny region.
-    if (!bitmap) 
-        goto try_small_from_end;
-        
-    slot = BITMAP32_CTZ(bitmap);
-    limit = free_list + NUM_SMALL_SLOTS - 1;
-    free_list += slot;
-
-    // Iterate over freelists looking for free blocks, starting at first list
-    // which is not empty, and contains blocks which are large enough to satisfy
-    // our request.
-    while (free_list < limit) {
-        ptr = *free_list;
-        if (ptr) {
-            next = free_list_unchecksum_ptr(ptr->next);
-            *free_list = next;
-            this_msize = SMALL_PTR_SIZE(ptr);
-            if (next) {
-                next->previous = ptr->previous;
-            } else {
-                BITMAP32_CLR(szone->small_bitmap, this_msize - 1);
-            }
-            goto add_leftover_and_proceed;
-        }
-        free_list++;
-    }
-
-    // We are now looking at the last slot, which contains blocks equal to, or
-    // due to coalescing of free blocks, larger than 31 * small quantum size.
-    // If the last freelist is not empty, and the head contains a block that is
-    // larger than our request, then the remainder is put back on the free list.
-    //
-    // FIXME: This code doesn't have the optimization from the 'tiny' codepath 
-    //        that optimizes for the this_msize >= 2 * num slots
-    // FIXME: this code also seems somewhat bogus.  There's a check for
-    //        this_msize >= msize, but by definition we can't ask for a small
-    //        block larger than 31 small quanta, and every free block in this
-    //        slot has to be at least that large.
-    ptr = *limit;
-    while (ptr) {
-        free_list_checksum(szone, ptr, __PRETTY_FUNCTION__);
-        next = free_list_unchecksum_ptr(ptr->next);
-        this_msize = SMALL_PTR_SIZE(ptr);
-        if (this_msize >= msize) {
-            small_free_list_remove_ptr(szone, ptr, this_msize);
-            goto add_leftover_and_proceed;
-        }
-        ptr = next;
-    }
-    
-try_small_from_end:
-    // Let's see if we can use szone->small_bytes_free_at_end
-    if (szone->small_bytes_free_at_end >= SMALL_BYTES_FOR_MSIZE(msize)) {
-        ptr = (free_list_t *)(SMALL_REGION_END(szone->last_small_region) - szone->small_bytes_free_at_end);
-        szone->small_bytes_free_at_end -= SMALL_BYTES_FOR_MSIZE(msize);
-        if (szone->small_bytes_free_at_end) {
-            // let's mark this block as in use to serve as boundary
-            *SMALL_METADATA_FOR_PTR((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize)) = SMALL_MSIZE_FOR_BYTES(szone->small_bytes_free_at_end);
-        }
-        this_msize = msize;
-        goto return_small_alloc;
-    }
-    return NULL;
-    
-add_leftover_and_proceed:
-    if (this_msize > msize) {
-        leftover_msize = this_msize - msize;
-        leftover_ptr = (free_list_t *)((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize));
-#if DEBUG_MALLOC
-        if (LOG(szone,ptr)) {
-            malloc_printf("in small_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize);
-        }
-#endif
-        small_free_list_add_ptr(szone, leftover_ptr, leftover_msize);
-        meta_headers = SMALL_META_HEADER_FOR_PTR(leftover_ptr);
-        leftover_index = SMALL_META_INDEX_FOR_PTR(leftover_ptr);
-        small_meta_header_set_is_free(meta_headers, leftover_index, leftover_msize);
-        this_msize = msize;
-    }
-    
-return_small_alloc:
-    szone->num_small_objects++;
-    szone->num_bytes_in_small_objects += SMALL_BYTES_FOR_MSIZE(this_msize);
-#if DEBUG_MALLOC
-    if (LOG(szone,ptr)) {
-	malloc_printf("in small_malloc_from_free_list(), ptr=%p, this_msize=%d, msize=%d\n", ptr, this_msize, msize);
-    }
-#endif
-    *SMALL_METADATA_FOR_PTR(ptr) = this_msize;
-    return ptr;
-}
-
-static INLINE void *
-small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested)
-{
-    boolean_t	locked = 0;
-#if SMALL_CACHE
-    void	*ptr;
-#endif
-    
-#if SMALL_CACHE
-    ptr = (void *)szone->last_small_free;
-    if ((((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) == msize) {
-	// we have a candidate - let's lock to make sure
-	LOCK_AND_NOTE_LOCKED(szone, locked);
-	if (ptr == (void *)szone->last_small_free) {
-	    szone->last_small_free = NULL;
-	    SZONE_UNLOCK(szone);
-	    CHECK(szone, __PRETTY_FUNCTION__);
-	    ptr = (void *)((uintptr_t)ptr & ~ (SMALL_QUANTUM - 1));
-	    if (cleared_requested) {
-		memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
-	    }
-	    return ptr;
-	}
-    }
-#endif
-    // Except in rare occasions where we need to add a new region, we are going to end up locking,
-    // so we might as well lock right away to avoid doing unnecessary optimistic probes
-    if (!locked) LOCK_AND_NOTE_LOCKED(szone, locked);
-    ptr = small_malloc_from_free_list(szone, msize);
-    if (ptr) {
-	SZONE_UNLOCK(szone);
-	CHECK(szone, __PRETTY_FUNCTION__);
-	if (cleared_requested) {
-	    memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
-	}
-	return ptr;
-    }
-    ptr = small_malloc_from_region_no_lock(szone, msize);
-    // we don't clear because this freshly allocated space is pristine
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-    return ptr;
-}
-
-// tries to allocate a small, cleared block
-static INLINE void *
-small_malloc_cleared_no_lock(szone_t *szone, msize_t msize)
-{
-    void	*ptr;
-
-    // Assumes already locked
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    ptr = small_malloc_from_free_list(szone, msize);
-    if (ptr) {
-	memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
-	return ptr;
-    } else {
-	ptr = small_malloc_from_region_no_lock(szone, msize);
-	// we don't clear because this freshly allocated space is pristine
-    }
-    return ptr;
-}
-
-static INLINE void
-free_small(szone_t *szone, void *ptr, region_t *small_region)
-{
-    msize_t		msize = SMALL_PTR_SIZE(ptr);
-#if SMALL_CACHE
-    void		*ptr2;
-#endif
-    
-    // ptr is known to be in small_region
-    SZONE_LOCK(szone);
-#if SMALL_CACHE
-    ptr2 = szone->last_small_free;
-    /* check that we don't already have this pointer in the cache */
-    if (ptr == (void *)((uintptr_t)ptr2 & ~ (SMALL_QUANTUM - 1))) {
-	szone_error(szone, "double free", ptr, NULL);
-	return;
-    }
-#endif
-    if (SMALL_PTR_IS_FREE(ptr)) {
-	szone_error(szone, "double free", ptr, NULL);
-	return;
-    }
-#if SMALL_CACHE
-    szone->last_small_free = (void *)(((uintptr_t)ptr) | msize);
-    if (!ptr2) {
-	SZONE_UNLOCK(szone);
-	CHECK(szone, __PRETTY_FUNCTION__);
-	return;
-    }
-    msize = (uintptr_t)ptr2 & (SMALL_QUANTUM - 1);
-    ptr = (void *)(((uintptr_t)ptr2) & ~ (SMALL_QUANTUM - 1));
-    small_region = small_region_for_ptr_no_lock(szone, ptr);
-    if (!small_region) {
-	szone_error(szone, "double free (small cache)", ptr, NULL);
-	return;
-    }
-#endif
-    small_free_no_lock(szone, small_region, ptr, msize);
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-}
-
-static void
-print_small_free_list(szone_t *szone)
-{
-    grain_t		grain = 0;
-    free_list_t		*ptr;
-    _SIMPLE_STRING	b = _simple_salloc();
-    
-    if (b) {
-	_simple_sappend(b, "small free sizes: ");
-	while (grain < NUM_SMALL_SLOTS) {
-	    ptr = szone->small_free_list[grain];
-	    if (ptr) {
-		_simple_sprintf(b, "%s%y[%d]; ", (grain == NUM_SMALL_SLOTS-1) ? ">=" : "", (grain + 1) * SMALL_QUANTUM, free_list_count(ptr));
-	    }
-	    grain++;
-	}
-	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
-	_simple_sfree(b);
-    }
-}
-
-static void
-print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_end)
-{
-    unsigned		counts[1024];
-    unsigned		in_use = 0;
-    void		*start = SMALL_REGION_ADDRESS(region);
-    void		*limit = SMALL_REGION_END(region) - bytes_at_end;
-    msize_t		msize_and_free;
-    msize_t		msize;
-    unsigned		ci;
-    _SIMPLE_STRING	b;
-
-    memset(counts, 0, 1024 * sizeof(unsigned));
-    while (start < limit) {
-	msize_and_free = *SMALL_METADATA_FOR_PTR(start);
-	msize = msize_and_free & ~ SMALL_IS_FREE;
-	if (!(msize_and_free & SMALL_IS_FREE)) {
-	    // block in use
-	    if (msize < 1024)
-		counts[msize]++;
-	    in_use++;
-	}
-	start += SMALL_BYTES_FOR_MSIZE(msize);
-    }
-    if ((b = _simple_salloc()) != NULL) {
-	_simple_sprintf(b, "Small region [%p-%p, %y]\tIn_use=%d ",
-	  SMALL_REGION_ADDRESS(region), SMALL_REGION_END(region), (int)SMALL_REGION_SIZE, in_use);
-	if (bytes_at_end)
-	    _simple_sprintf(b, "Untouched=%ly", bytes_at_end);
-	if (verbose && in_use) {
-	    _simple_sappend(b, "\n\tSizes in use: "); 
-	    for (ci = 0; ci < 1024; ci++)
-		if (counts[ci])
-		    _simple_sprintf(b, "%d[%d] ", SMALL_BYTES_FOR_MSIZE(ci), counts[ci]);
-	}
-	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
-	_simple_sfree(b);
-    }
-}
-
-static boolean_t
-small_free_list_check(szone_t *szone, grain_t grain)
-{
-    unsigned	count = 0;
-    free_list_t	*ptr = szone->small_free_list[grain];
-    free_list_t	*previous = NULL;
-    msize_t	msize_and_free;
-
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-    while (ptr) {
-        msize_and_free = *SMALL_METADATA_FOR_PTR(ptr);
-        count++;
-        if (!(msize_and_free & SMALL_IS_FREE)) {
-            malloc_printf("*** in-use ptr in free list grain=%d count=%d ptr=%p\n", grain, count, ptr);
-            return 0;
-        }
-        if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) {
-            malloc_printf("*** unaligned ptr in free list grain=%d  count=%d ptr=%p\n", grain, count, ptr);
-            return 0;
-        }
-        if (!small_region_for_ptr_no_lock(szone, ptr)) {
-            malloc_printf("*** ptr not in szone grain=%d  count=%d ptr=%p\n", grain, count, ptr);
-            return 0;
-        }
-        free_list_checksum(szone, ptr, __PRETTY_FUNCTION__);
-        if (free_list_unchecksum_ptr(ptr->previous) != previous) {
-            malloc_printf("*** previous incorrectly set grain=%d  count=%d ptr=%p\n", grain, count, ptr);
-            return 0;
-        }
-        previous = ptr;
-        ptr = free_list_unchecksum_ptr(ptr->next);
-    }
-    return 1;
-}
-
-/*******************************************************************************
- * Region hash implementation
- *
- * This is essentially a duplicate of the existing Large allocator hash, minus
- * the ability to remove entries.  The two should be combined eventually.
- ******************************************************************************/
-#pragma mark region hash
-
-/*
- * hash_lookup_region_no_lock - Scan a hash ring looking for an entry for a 
- * given region.
- *
- * FIXME: If consecutive queries of the same region are likely, a one-entry
- * cache would likely be a significant performance win here.
- */
-static region_t *
-hash_lookup_region_no_lock(region_t *regions, size_t num_entries, region_t r) {
-  size_t index, hash_index;
-  region_t *entry;
-
-  if (!num_entries)
-    return 0;
-  
-  index = hash_index = ((uintptr_t)r >> 20) % num_entries;
-  do {
-    entry = regions + index;
-    if (*entry == 0)
-      return 0;
-    if (*entry == r)
-      return entry;
-    if (++index == num_entries)
-      index = 0;
-  } while (index != hash_index);
-  return 0;
-}
-
-/*
- * hash_region_insert_no_lock - Insert a region into the hash ring.
- */
-static void
-hash_region_insert_no_lock(region_t *regions, size_t num_entries, region_t r) {
-  size_t index, hash_index;
-  region_t *entry;
-  
-  index = hash_index = ((uintptr_t)r >> 20) % num_entries;
-  do {
-    entry = regions + index;
-    if (*entry == 0) {
-	    *entry = r;
-      return;
-    }
-    if (++index == num_entries)
-	    index = 0;
-  } while (index != hash_index);
-}
-
-/*
- * hash_regions_alloc_no_lock - Allocate space for a number of entries.  This
- * must be a VM allocation as to avoid recursing between allocating a new small
- * region, and asking the small region to allocate space for the new list of
- * regions.
- */
-static region_t *
-hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries)
-{
-  size_t size = num_entries * sizeof(region_t);
-	return allocate_pages(szone, round_page(size), 0, 0, VM_MEMORY_MALLOC);
-}
-
-/*
- * hash_regions_grow_no_lock - Grow the hash ring, and rehash the entries.
- * Return the new region and new size to update the szone.  Do not deallocate
- * the old entries since someone may still be allocating them.
- */
-static region_t *
-hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size,
-                          size_t *new_size)
-{
-  // double in size and allocate memory for the regions
-  *new_size = old_size * 2 + 1;
-  region_t *new_regions = hash_regions_alloc_no_lock(szone, *new_size);
-
-  // rehash the entries into the new list
-  size_t index;
-  for (index = 0; index < old_size; ++index) {
-    region_t r = regions[index];
-    if (r != 0)
-      hash_region_insert_no_lock(new_regions, *new_size, r);
-  }
-  return new_regions;
-}
-
-/*******************************************************************************
- * Large allocator implementation
- ******************************************************************************/
-#pragma mark large allocator
-
-#if DEBUG_MALLOC
-
-static void
-large_debug_print(szone_t *szone)
-{
-    unsigned		num_large_entries = szone->num_large_entries;
-    unsigned		index = num_large_entries;
-    large_entry_t	*range;
-    _SIMPLE_STRING	b = _simple_salloc();
-
-    if (b) {
-	for (index = 0, range = szone->large_entries; index < szone->num_large_entries; index++, range++)
-	    if (!LARGE_ENTRY_IS_EMPTY(*range))
-		_simple_sprintf(b, "%d: %p(%y);  ", index, LARGE_ENTRY_ADDRESS(*range), LARGE_ENTRY_SIZE(*range));
-
-	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
-	_simple_sfree(b);
-    }
-}
-#endif
-
-/*
- * Scan the hash ring looking for an entry for the given pointer.
- */
-static large_entry_t *
-large_entry_for_pointer_no_lock(szone_t *szone, const void *ptr)
-{
-    // result only valid with lock held
-    unsigned		num_large_entries = szone->num_large_entries;
-    unsigned		hash_index;
-    unsigned		index;
-    large_entry_t	*range;
-
-    if (!num_large_entries)
-	return NULL;
-    hash_index = ((uintptr_t)ptr >> vm_page_shift) % num_large_entries;
+/*********************	LARGE ENTRY UTILITIES	************************/
+
+#if DEBUG_MALLOC
+
+static void large_debug_print(szone_t *szone) {
+    unsigned	num_large_entries = szone->num_large_entries;
+    unsigned	index = num_large_entries;
+    while (index--) {
+        large_entry_t	*range = szone->large_entries + index;
+        large_entry_t	entry = *range;
+        if (!LARGE_ENTRY_IS_EMPTY(entry)) malloc_printf("%d: 0x%x(%dKB);  ", index, LARGE_ENTRY_ADDRESS(entry), LARGE_ENTRY_SIZE(entry)/1024);
+    }
+    malloc_printf("\n");
+}
+#endif
+
+static large_entry_t *large_entry_for_pointer_no_lock(szone_t *szone, const void *ptr) {
+    // result only valid during a lock
+    unsigned	num_large_entries = szone->num_large_entries;
+    unsigned	hash_index;
+    unsigned	index;
+    if (!num_large_entries) return NULL;
+    hash_index = ((unsigned)ptr >> vm_page_shift) % num_large_entries;
     index = hash_index;
     do {
-	range = szone->large_entries + index;
-	if (LARGE_ENTRY_MATCHES(*range, ptr))
-	    return range;
-	if (LARGE_ENTRY_IS_EMPTY(*range))
-	    return NULL; // end of chain
-	index++;
-	if (index == num_large_entries)
-	    index = 0;
+        large_entry_t	*range = szone->large_entries + index;
+        large_entry_t	entry = *range;
+        if (LARGE_ENTRY_MATCHES(entry, ptr)) return range;
+        if (LARGE_ENTRY_IS_EMPTY(entry)) return NULL; // end of chain
+        index++; if (index == num_large_entries) index = 0;
     } while (index != hash_index);
     return NULL;
 }
 
-static void
-large_entry_insert_no_lock(szone_t *szone, large_entry_t range)
-{
-    unsigned		num_large_entries = szone->num_large_entries;
-    unsigned		hash_index = (range.address_and_num_pages >> vm_page_shift) % num_large_entries;
-    unsigned		index = hash_index;
-    large_entry_t	*entry;
-
+static void large_entry_insert_no_lock(szone_t *szone, large_entry_t range) {
+    unsigned	num_large_entries = szone->num_large_entries;
+    unsigned	hash_index = (range.address_and_num_pages >> vm_page_shift) % num_large_entries;
+    unsigned	index = hash_index;
+    // malloc_printf("Before insertion of 0x%x\n", LARGE_ENTRY_ADDRESS(range));
     do {
-	entry = szone->large_entries + index;
-	if (LARGE_ENTRY_IS_EMPTY(*entry)) {
-	    *entry = range;
-	    return; // end of chain
-	}
-	index++;
-	if (index == num_large_entries)
-	    index = 0;
+        large_entry_t	*entry = szone->large_entries + index;
+        if (LARGE_ENTRY_IS_EMPTY(*entry)) {
+            *entry = range;
+            return; // end of chain
+        }
+        index++; if (index == num_large_entries) index = 0;
     } while (index != hash_index);
 }
 
-static INLINE void
-large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry)
-{
-    unsigned		num_large_entries = szone->num_large_entries;
-    unsigned		hash_index = entry - szone->large_entries;
-    unsigned		index = hash_index;
-    large_entry_t	range;
-
+static INLINE void large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry) {
+    unsigned	num_large_entries = szone->num_large_entries;
+    unsigned	hash_index = entry - szone->large_entries;
+    unsigned	index = hash_index;
     do {
-	index++;
-	if (index == num_large_entries)
-	    index = 0;
-	range = szone->large_entries[index];
-	if (LARGE_ENTRY_IS_EMPTY(range))
-	    return;
-	szone->large_entries[index].address_and_num_pages = 0;
-	large_entry_insert_no_lock(szone, range); // this will reinsert in the
-						  // proper place
+        large_entry_t	range;
+        index++; if (index == num_large_entries) index = 0;
+        range = szone->large_entries[index];
+        if (LARGE_ENTRY_IS_EMPTY(range)) return;
+        szone->large_entries[index].address_and_num_pages = 0;
+        large_entry_insert_no_lock(szone, range); // this will reinsert in the proper place
     } while (index != hash_index);
 }
 
-static INLINE large_entry_t *
-large_entries_alloc_no_lock(szone_t *szone, unsigned num)
-{
+static INLINE large_entry_t *large_entries_alloc_no_lock(szone_t *szone, unsigned num) {
     size_t	size = num * sizeof(large_entry_t);
     boolean_t	is_vm_allocation = size >= LARGE_THRESHOLD;
-
     if (is_vm_allocation) {
-	// Note that we allocate memory (via a system call) under a spin lock
-	// That is certainly evil, however it's very rare in the lifetime of a process
-	// The alternative would slow down the normal case
-	return allocate_pages(szone, round_page(size), 0, 0, VM_MEMORY_MALLOC_LARGE);
+        return (void *)allocate_pages(szone, round_page(size), 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_LARGE));
     } else {
-	return small_malloc_cleared_no_lock(szone, SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1));
-    }
-}
-
-static void
-large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, vm_range_t *range_to_deallocate)
-{
-    // returns range to deallocate
-    size_t		size = num * sizeof(large_entry_t);
-    boolean_t		is_vm_allocation = size >= LARGE_THRESHOLD;
-    region_t	*region;
-    msize_t		msize_and_free;
-    
+        return small_malloc_cleared_no_lock(szone, (size + PTR_HEADER_SIZE + QUANTUM - 1) >> SHIFT_QUANTUM);
+    }
+}
+
+static void large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num) {
+    size_t	size = num * sizeof(large_entry_t);
+    boolean_t	is_vm_allocation = size >= LARGE_THRESHOLD;
     if (is_vm_allocation) {
-	range_to_deallocate->address = (vm_address_t)entries;
-	range_to_deallocate->size = round_page(size);
+        deallocate_pages(szone, (vm_address_t)entries, round_page(size), 0);
     } else {
-	range_to_deallocate->size = 0;
-	region = small_region_for_ptr_no_lock(szone, entries);
-	msize_and_free = *SMALL_METADATA_FOR_PTR(entries);
-	if (msize_and_free & SMALL_IS_FREE) {
-	    szone_error(szone, "object already freed being freed", entries, NULL);
-	    return;
-	}
-	small_free_no_lock(szone, region, entries, msize_and_free);
-    }
-}
-
-static large_entry_t *
-large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate)
-{
-    // sets range_to_deallocate
+        region_t	*region = region_for_ptr_no_lock(szone, entries);
+        msize_t		msize_and_free = MSIZE_FLAGS_FOR_PTR(entries);
+        if (msize_and_free & THIS_FREE) {
+            szone_error(szone, "Object already freed being freed", entries);
+            return;
+        }
+        small_free_no_lock(szone, region, entries, msize_and_free);
+    }
+}
+
+static void large_entries_grow_no_lock(szone_t *szone) {
     unsigned		old_num_entries = szone->num_large_entries;
     large_entry_t	*old_entries = szone->large_entries;
-    unsigned		new_num_entries = (old_num_entries) ? old_num_entries * 2 + 1 : 63; // always an odd number for good hashing
+    unsigned		new_num_entries = (old_num_entries) ? old_num_entries * 2 + 1 : 15; // always an odd number for good hashing
     large_entry_t	*new_entries = large_entries_alloc_no_lock(szone, new_num_entries);
     unsigned		index = old_num_entries;
-    large_entry_t	oldRange;
-
-    // if the allocation of new entries failed, bail
-	if (new_entries == NULL)
-		return NULL;
-
     szone->num_large_entries = new_num_entries;
     szone->large_entries = new_entries;
-
-    /* rehash entries into the new list */
+    // malloc_printf("_grow_large_entries old_num_entries=%d new_num_entries=%d\n", old_num_entries, new_num_entries);
     while (index--) {
-	oldRange = old_entries[index];
-	if (!LARGE_ENTRY_IS_EMPTY(oldRange)) {
-	    large_entry_insert_no_lock(szone, oldRange);
-	}
-    }
-    if (old_entries) {
-	large_entries_free_no_lock(szone, old_entries, old_num_entries, range_to_deallocate);
-    } else {
-	range_to_deallocate->size = 0;
-    }
-    return new_entries;
-}
-
-// frees the specific entry in the size table
-// returns a range to truly deallocate
-static vm_range_t
-large_free_no_lock(szone_t *szone, large_entry_t *entry)
-{
+        large_entry_t	oldRange = old_entries[index];
+        if (!LARGE_ENTRY_IS_EMPTY(oldRange)) large_entry_insert_no_lock(szone, oldRange);
+    }
+    if (old_entries) large_entries_free_no_lock(szone, old_entries, old_num_entries);
+}
+
+static vm_range_t large_free_no_lock(szone_t *szone, large_entry_t *entry) {
+    // frees the specific entry in the size table
+    // returns a range to truly deallocate
     vm_range_t		range;
-    
-    range.address = (vm_address_t)LARGE_ENTRY_ADDRESS(*entry);
-    range.size = (vm_size_t)LARGE_ENTRY_SIZE(*entry);
-    szone->num_large_objects_in_use--;
+    range.address = LARGE_ENTRY_ADDRESS(*entry);
+    range.size = LARGE_ENTRY_SIZE(*entry);
+    szone->num_large_objects_in_use --;
     szone->num_bytes_in_large_objects -= range.size;
     if (szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) {
-	protect((void *)range.address, range.size, VM_PROT_READ | VM_PROT_WRITE, szone->debug_flags);
-	range.address -= vm_page_size;
-	range.size += 2 * vm_page_size;
-    }
+        protect(szone, range.address, range.size, VM_PROT_READ | VM_PROT_WRITE, szone->debug_flags);
+        range.address -= vm_page_size;
+        range.size += 2 * vm_page_size;
+    }
+    // printf("Entry is 0x%x=%d; cache is 0x%x ; found=0x%x\n", entry, entry-szone->large_entries, szone->large_entries, large_entry_for_pointer_no_lock(szone, (void *)range.address));
     entry->address_and_num_pages = 0;
     large_entries_rehash_after_entry_no_lock(szone, entry);
 #if DEBUG_MALLOC
     if (large_entry_for_pointer_no_lock(szone, (void *)range.address)) {
-	malloc_printf("*** freed entry %p still in use; num_large_entries=%d\n",
-	  range.address, szone->num_large_entries);
-	large_debug_print(szone);
-	szone_sleep();
+        malloc_printf("*** malloc[%d]: Freed entry 0x%x still in use; num_large_entries=%d\n", getpid(), range.address, szone->num_large_entries);
+        large_debug_print(szone);
+        sleep(3600);
     }
 #endif
     return range;
 }
 
-static kern_return_t
-large_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t large_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder)
-{
+static kern_return_t large_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t large_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder) {
     unsigned		index = 0;
     vm_range_t		buffer[MAX_RECORDER_BUFFER];
     unsigned		count = 0;
     large_entry_t	*entries;
     kern_return_t	err;
-    vm_range_t		range;
-    large_entry_t	entry;
-
     err = reader(task, large_entries_address, sizeof(large_entry_t) * num_entries, (void **)&entries);
-    if (err)
-	return err;
+    if (err) return err;
     index = num_entries;
-    if ((type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) &&
-      (num_entries * sizeof(large_entry_t) >= LARGE_THRESHOLD)) {
-	range.address = large_entries_address;
-	range.size = round_page(num_entries * sizeof(large_entry_t));
-	recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &range, 1);
-    }
-    if (type_mask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE))
-	while (index--) {
-	    entry = entries[index];
-	    if (!LARGE_ENTRY_IS_EMPTY(entry)) {
-		range.address = (vm_address_t)LARGE_ENTRY_ADDRESS(entry);
-		range.size = (vm_size_t)LARGE_ENTRY_SIZE(entry);
-		buffer[count++] = range;
-		if (count >= MAX_RECORDER_BUFFER) {
-		    recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE, buffer, count);
-		    count = 0;
-		}
-	    }
-	}
-    if (count) {
-	recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE
-	  | MALLOC_PTR_REGION_RANGE_TYPE, buffer, count);
-    }
+    if ((type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) && (num_entries * sizeof(large_entry_t) >= LARGE_THRESHOLD)) {
+        vm_range_t	range;
+        range.address = large_entries_address;
+        range.size = round_page(num_entries * sizeof(large_entry_t));
+        recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &range, 1);
+    }
+    if (type_mask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE)) while (index--) {
+        large_entry_t	entry = entries[index];
+        if (!LARGE_ENTRY_IS_EMPTY(entry)) {
+            vm_range_t	range;
+            range.address = LARGE_ENTRY_ADDRESS(entry);
+            range.size = LARGE_ENTRY_SIZE(entry);
+            buffer[count++] = range;
+            if (count >= MAX_RECORDER_BUFFER) {
+                recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE, buffer, count);
+                count = 0;
+            }
+        }
+    }
+    if (count) recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE, buffer, count);
     return 0;
 }
 
 /*********************	HUGE ENTRY UTILITIES	************************/
 
-static huge_entry_t *
-huge_entry_for_pointer_no_lock(szone_t *szone, const void *ptr)
-{
-    unsigned		index;
-    huge_entry_t	*huge;
-
-    for (index = szone->num_huge_entries, huge = szone->huge_entries;
-	 index > 0;
-	 index--, huge++) {
-    
-	if ((void *)huge->address == ptr)
-	    return huge;
+static huge_entry_t *huge_entry_for_pointer_no_lock(szone_t *szone, const void *ptr) {
+    unsigned	index = szone->num_huge_entries;
+    while (index--) {
+        huge_entry_t	*huge = szone->huge_entries + index;
+        if (huge->address == (vm_address_t)ptr) return huge;
     }
     return NULL;
 }
 
-static boolean_t
-huge_entry_append(szone_t *szone, huge_entry_t huge)
-{
-    huge_entry_t	*new_huge_entries = NULL, *old_huge_entries;
-    unsigned		num_huge_entries;
-    
-    // We do a little dance with locking because doing allocation (even in the
-    // default szone) may cause something to get freed in this szone, with a
-    // deadlock
-    // Returns 1 on success
+static boolean_t huge_entry_append(szone_t *szone, huge_entry_t huge) {
+    // We do a little dance with locking because doing allocation (even in the default szone) may cause something to get freed in this szone, with a deadlock
+    huge_entry_t	*new_huge_entries = NULL;
     SZONE_LOCK(szone);
-    for (;;) {
-	num_huge_entries = szone->num_huge_entries;
-	SZONE_UNLOCK(szone);
-	/* check for counter wrap */
-	if ((num_huge_entries + 1) < num_huge_entries)
-		return 0;
-	/* stale allocation from last time around the loop? */
-	if (new_huge_entries)
-	    szone_free(szone, new_huge_entries);
-	new_huge_entries = szone_malloc(szone, (num_huge_entries + 1) * sizeof(huge_entry_t));
+    while (1) {
+        unsigned	num_huge_entries;
+        num_huge_entries = szone->num_huge_entries;
+        SZONE_UNLOCK(szone);
+        // malloc_printf("In huge_entry_append currentEntries=%d\n", num_huge_entries);
+        if (new_huge_entries) szone_free(szone, new_huge_entries);
+        new_huge_entries = szone_malloc(szone, (num_huge_entries + 1) * sizeof(huge_entry_t));
 	if (new_huge_entries == NULL)
-	    return 0;
-	SZONE_LOCK(szone);
-	if (num_huge_entries == szone->num_huge_entries) {
-	    // No change - our malloc still applies
-	    old_huge_entries = szone->huge_entries;
-	    if (num_huge_entries) {
-		memcpy(new_huge_entries, old_huge_entries, num_huge_entries * sizeof(huge_entry_t));
-	    }
-	    new_huge_entries[szone->num_huge_entries++] = huge;
-	    szone->huge_entries = new_huge_entries;
-	    SZONE_UNLOCK(szone);
-	    szone_free(szone, old_huge_entries);
 	    return 1;
-	}
-	// try again!
-    }
-}
-
-static kern_return_t
-huge_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t huge_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder)
-{
+        SZONE_LOCK(szone);
+        if (num_huge_entries == szone->num_huge_entries) {
+            // No change - our malloc still applies
+            huge_entry_t	*old_huge_entries = szone->huge_entries;
+            if (num_huge_entries) memcpy(new_huge_entries, old_huge_entries, num_huge_entries * sizeof(huge_entry_t));
+            new_huge_entries[szone->num_huge_entries++] = huge;
+            szone->huge_entries = new_huge_entries;
+            SZONE_UNLOCK(szone);
+            szone_free(szone, old_huge_entries);
+            // malloc_printf("Done huge_entry_append now=%d\n", szone->num_huge_entries);
+            return 0;
+        }
+        // try again!
+    }
+}
+
+static kern_return_t huge_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t huge_entries_address, unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder) {
     huge_entry_t	*entries;
     kern_return_t	err;
-    
     err = reader(task, huge_entries_address, sizeof(huge_entry_t) * num_entries, (void **)&entries);
-    if (err)
-	return err;
-    if (num_entries)
-	recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE, entries, num_entries);
-
+    if (err) return err;
+    if (num_entries) recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE, entries, num_entries);
     return 0;
 }
 
-static void *
-large_and_huge_malloc(szone_t *szone, size_t num_pages)
-{
-    void		*addr;
-    vm_range_t		range_to_deallocate;
-    huge_entry_t	huge_entry;
-    size_t		size;
-    large_entry_t	large_entry;
-    
-    if (!num_pages)
-	    num_pages = 1; // minimal allocation size for this szone
-    size = (size_t)num_pages << vm_page_shift;
-    range_to_deallocate.size = 0;
+static void *large_and_huge_malloc(szone_t *szone, unsigned num_pages) {
+    vm_address_t	addr = 0;
+    if (!num_pages) num_pages = 1; // minimal allocation size for this szone
+    // malloc_printf("In large_and_huge_malloc for %dKB\n", num_pages * vm_page_size / 1024);
     if (num_pages >= (1 << vm_page_shift)) {
-	addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MEMORY_MALLOC_HUGE);
-	if (addr == NULL)
+        huge_entry_t	huge;
+        huge.size = num_pages << vm_page_shift;
+        addr = allocate_pages(szone, huge.size, szone->debug_flags, VM_MAKE_TAG(VM_MEMORY_MALLOC_HUGE));
+        if (!addr) return NULL;
+        huge.address = addr;
+        if (huge_entry_append(szone, huge))
 	    return NULL;
-	huge_entry.size = size;
-	huge_entry.address = (vm_address_t)addr;
-	if (!huge_entry_append(szone, huge_entry))
-	    return NULL;	// we are leaking the allocation here
+        SZONE_LOCK(szone);
+        szone->num_bytes_in_huge_objects += huge.size;
+    } else {
+        vm_size_t		size = num_pages << vm_page_shift;
+        large_entry_t		entry;
+	addr = allocate_pages(szone, size, szone->debug_flags, VM_MAKE_TAG(VM_MEMORY_MALLOC_LARGE));
+	if (LOG(szone, addr)) malloc_printf("In szone_malloc true large allocation at %p for %dKB\n", (void *)addr, size / 1024);
 	SZONE_LOCK(szone);
-	szone->num_bytes_in_huge_objects += size;
-    } else {
-
-	addr = allocate_pages(szone, size, 0, szone->debug_flags, VM_MEMORY_MALLOC_LARGE);
-#if DEBUG_MALLOC
-	if (LOG(szone, addr))
-	    malloc_printf("in szone_malloc true large allocation at %p for %ly\n", (void *)addr, size);
-#endif
-	SZONE_LOCK(szone);
-	if (addr == NULL) {
+	if (!addr) {
 	    SZONE_UNLOCK(szone);
 	    return NULL;
 	}
 #if DEBUG_MALLOC
-	if (large_entry_for_pointer_no_lock(szone, addr)) {
-	    malloc_printf("freshly allocated is already in use: %p\n", addr);
+	if (large_entry_for_pointer_no_lock(szone, (void *)addr)) {
+	    malloc_printf("Freshly allocated is already in use: 0x%x\n", addr);
 	    large_debug_print(szone);
-	    szone_sleep();
+	    sleep(3600);
 	}
 #endif
-	if ((szone->num_large_objects_in_use + 1) * 4 > szone->num_large_entries) {
-	    // density of hash table too high; grow table
-	    // we do that under lock to avoid a race
-	    large_entry_t *entries = large_entries_grow_no_lock(szone, &range_to_deallocate);
-	    if (entries == NULL) {
-	    	SZONE_UNLOCK(szone);
-	    	return NULL;
-	    }
-	}
-	large_entry.address_and_num_pages = (uintptr_t)addr | num_pages;
-#if DEBUG_MALLOC
-	if (large_entry_for_pointer_no_lock(szone, addr)) {
-	    malloc_printf("entry about to be added already in use: %p\n", addr);
-	    large_debug_print(szone);
-	    szone_sleep();
-	}
-#endif
-	large_entry_insert_no_lock(szone, large_entry);
-#if DEBUG_MALLOC
-	if (!large_entry_for_pointer_no_lock(szone, (void *)addr)) {
-	    malloc_printf("can't find entry just added\n");
-	    large_debug_print(szone);
-	    szone_sleep();
-	}
-#endif
-	szone->num_large_objects_in_use ++;
-	szone->num_bytes_in_large_objects += size;
+        if ((szone->num_large_objects_in_use + 1) * 4 > szone->num_large_entries) {
+            // density of hash table too high; grow table
+            // we do that under lock to avoid a race
+            // malloc_printf("In szone_malloc growing hash table current=%d\n", szone->num_large_entries);
+            large_entries_grow_no_lock(szone);
+        }
+        // malloc_printf("Inserting large entry (0x%x, %dKB)\n", addr, num_pages * vm_page_size / 1024);
+        entry.address_and_num_pages = addr | num_pages;
+#if DEBUG_MALLOC
+        if (large_entry_for_pointer_no_lock(szone, (void *)addr)) {
+            malloc_printf("Entry about to be added already in use: 0x%x\n", addr);
+            large_debug_print(szone);
+            sleep(3600);
+        }
+#endif
+        large_entry_insert_no_lock(szone, entry);
+#if DEBUG_MALLOC
+        if (!large_entry_for_pointer_no_lock(szone, (void *)addr)) {
+            malloc_printf("Can't find entry just added\n");
+            large_debug_print(szone);
+            sleep(3600);
+        }
+#endif
+        // malloc_printf("Inserted large entry (0x%x, %d pages)\n", addr, num_pages);
+        szone->num_large_objects_in_use ++;
+        szone->num_bytes_in_large_objects += size;
     }
     SZONE_UNLOCK(szone);
-    if (range_to_deallocate.size) {
-	deallocate_pages(szone, (void *)range_to_deallocate.address, range_to_deallocate.size, 0); // we deallocate outside the lock
-    }
     return (void *)addr;
 }
 
-static INLINE void
-free_large_or_huge(szone_t *szone, void *ptr)
-{
-    // We have established ptr is page-aligned and not tiny nor small
+/*********************	Zone call backs	************************/
+
+static void szone_free(szone_t *szone, void *ptr) {
+    region_t		*region;
     large_entry_t	*entry;
     vm_range_t		vm_range_to_deallocate;
     huge_entry_t	*huge;
-    
+    if (LOG(szone, ptr)) malloc_printf("In szone_free with %p\n", ptr);
+    if (!ptr) return;
+    if ((vm_address_t)ptr & (QUANTUM - 1)) {
+        szone_error(szone, "Non-aligned pointer being freed", ptr);
+        return;
+    }
+    // try a small pointer
+    region = region_for_ptr_no_lock(szone, ptr);
+    if (region) {
+        // this is indeed a valid pointer
+        msize_t		msize_and_free;
+        SZONE_LOCK(szone);
+        msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
+        if (msize_and_free & THIS_FREE) {
+            szone_error(szone, "Object already freed being freed", ptr);
+            return;
+        }
+        CHECK(szone, __PRETTY_FUNCTION__);
+        small_free_no_lock(szone, region, ptr, msize_and_free);
+        CHECK(szone, __PRETTY_FUNCTION__);
+        SZONE_UNLOCK(szone);
+        return;
+    }
+    if (((unsigned)ptr) & (vm_page_size - 1)) {
+        szone_error(szone, "Non-page-aligned, non-allocated pointer being freed", ptr);
+        return;
+    }
     SZONE_LOCK(szone);
     entry = large_entry_for_pointer_no_lock(szone, ptr);
     if (entry) {
-	vm_range_to_deallocate = large_free_no_lock(szone, entry);
-#if DEBUG_MALLOC
-	if (large_entry_for_pointer_no_lock(szone, ptr)) {
-	    malloc_printf("*** just after freeing %p still in use num_large_entries=%d\n", ptr, szone->num_large_entries);
-	    large_debug_print(szone);
-	    szone_sleep();
+        // malloc_printf("Ready for deallocation [0x%x-%dKB]\n", LARGE_ENTRY_ADDRESS(*entry), LARGE_ENTRY_SIZE(*entry)/1024);
+	if (KILL_THRESHOLD && (LARGE_ENTRY_SIZE(*entry) > KILL_THRESHOLD)) {
+	    // We indicate to the VM system that these pages contain garbage and therefore don't need to be swapped out
+	    vm_msync(mach_task_self(), LARGE_ENTRY_ADDRESS(*entry), LARGE_ENTRY_SIZE(*entry), VM_SYNC_KILLPAGES);
 	}
+        vm_range_to_deallocate = large_free_no_lock(szone, entry);
+#if DEBUG_MALLOC
+        if (large_entry_for_pointer_no_lock(szone, ptr)) {
+            malloc_printf("*** malloc[%d]: Just after freeing 0x%x still in use num_large_entries=%d\n", getpid(), ptr, szone->num_large_entries);
+            large_debug_print(szone);
+            sleep(3600);
+        }
 #endif
     } else if ((huge = huge_entry_for_pointer_no_lock(szone, ptr))) {
-	vm_range_to_deallocate = *huge;
-	*huge = szone->huge_entries[--szone->num_huge_entries]; // last entry fills that spot
-	szone->num_bytes_in_huge_objects -= (size_t)vm_range_to_deallocate.size;
+        vm_range_to_deallocate = *huge;
+        *huge = szone->huge_entries[--szone->num_huge_entries]; // last entry fills that spot
+        szone->num_bytes_in_huge_objects -= vm_range_to_deallocate.size;
     } else {
 #if DEBUG_MALLOC
-	large_debug_print(szone);
-#endif
-	szone_error(szone, "pointer being freed was not allocated", ptr, NULL);
-	return;
-    }
+        large_debug_print(szone);
+#endif
+        szone_error(szone, "Pointer being freed was not allocated", ptr);
+        return;
+    }
+    CHECK(szone, __PRETTY_FUNCTION__);
     SZONE_UNLOCK(szone); // we release the lock asap
-    CHECK(szone, __PRETTY_FUNCTION__);
     // we deallocate_pages, including guard pages
     if (vm_range_to_deallocate.address) {
-#if DEBUG_MALLOC
-	if (large_entry_for_pointer_no_lock(szone, (void *)vm_range_to_deallocate.address)) {
-	    malloc_printf("*** invariant broken: %p still in use num_large_entries=%d\n", vm_range_to_deallocate.address, szone->num_large_entries);
-	    large_debug_print(szone);
-	    szone_sleep();
-	}
-#endif
-	deallocate_pages(szone, (void *)vm_range_to_deallocate.address, (size_t)vm_range_to_deallocate.size, 0);
-    }
-}
-
-static INLINE int
-try_realloc_large_or_huge_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
-{
-    vm_address_t	addr = (vm_address_t)ptr + old_size;
-    large_entry_t	*large_entry, saved_entry;
-    huge_entry_t	*huge_entry, huge;
-    kern_return_t	err;
-
-#if DEBUG_MALLOC
-    if (old_size != ((old_size >> vm_page_shift) << vm_page_shift)) {
-	malloc_printf("*** old_size is %d\n", old_size);
-    }
-#endif
-    SZONE_LOCK(szone);
-    large_entry = large_entry_for_pointer_no_lock(szone, (void *)addr);
-    SZONE_UNLOCK(szone);
-    if (large_entry) {
-	return 0; // large pointer already exists in table - extension is not going to work
-    }
-    new_size = round_page(new_size);
-    /*
-     * Ask for allocation at a specific address, and mark as realloc
-     * to request coalescing with previous realloc'ed extensions.
-     */
-    err = vm_allocate(mach_task_self(), &addr, new_size - old_size, VM_MAKE_TAG(VM_MEMORY_REALLOC));
-    if (err != KERN_SUCCESS) {
-	return 0;
-    }
-    SZONE_LOCK(szone);
-    /*
-     * If the new size is still under the large/huge threshold, we can just
-     * extend the existing large block.
-     *
-     * Note: this logic is predicated on the understanding that an allocated
-     * block can never really shrink, so that the new size will always be 
-     * larger than the old size.
-     *
-     * Note: the use of 1 << vm_page_shift here has to do with the subdivision
-     * of the bits in the large_entry_t, and not the size of a page (directly).
-     */
-    if ((new_size >> vm_page_shift) < (1 << vm_page_shift)) {
-	/* extend existing large entry */
-	large_entry = large_entry_for_pointer_no_lock(szone, ptr);
-	if (!large_entry) {
-	    szone_error(szone, "large entry reallocated is not properly in table", ptr, NULL);
-	    /* XXX will cause fault on next reference to entry */
-	}
-	large_entry->address_and_num_pages = (uintptr_t)ptr | (new_size >> vm_page_shift);
-	szone->num_bytes_in_large_objects += new_size - old_size;
-    } else if ((old_size >> vm_page_shift) >= (1 << vm_page_shift)) {
-	/* extend existing huge entry */
-	huge_entry = huge_entry_for_pointer_no_lock(szone, ptr);
-	if (!huge_entry) {
-	    szone_error(szone, "huge entry reallocated is not properly in table", ptr, NULL);
-	    /* XXX will cause fault on next reference to huge_entry */
-	}
-	huge_entry->size = new_size;
-	szone->num_bytes_in_huge_objects += new_size - old_size;
+        // malloc_printf("About to deallocate 0x%x size %dKB\n", vm_range_to_deallocate.address, vm_range_to_deallocate.size / 1024);
+#if DEBUG_MALLOC
+        if (large_entry_for_pointer_no_lock(szone, (void *)vm_range_to_deallocate.address)) {
+            malloc_printf("*** malloc[%d]: Invariant broken: 0x%x still in use num_large_entries=%d\n", getpid(), vm_range_to_deallocate.address, szone->num_large_entries);
+            large_debug_print(szone);
+            sleep(3600);
+        }
+#endif
+        deallocate_pages(szone, vm_range_to_deallocate.address, vm_range_to_deallocate.size, 0);
+    }
+}
+
+static INLINE void *szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested) {
+    void	*ptr;
+    if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) && (size < LARGE_THRESHOLD)) {
+        // think small
+        size_t		msize = (size + PTR_HEADER_SIZE + QUANTUM - 1) >> SHIFT_QUANTUM;
+        if (msize < MIN_BLOCK) msize = MIN_BLOCK;
+        ptr = small_malloc_should_clear(szone, msize, cleared_requested);
+#if DEBUG_MALLOC
+        if ((MSIZE_FLAGS_FOR_PTR(ptr) & ~ PREV_FREE) < msize) {
+            malloc_printf("ptr=%p this=%d msize=%d\n", ptr, MSIZE_FLAGS_FOR_PTR(ptr), (int)msize);
+            szone_error(szone, "Pointer allocated has improper size (1)", ptr);
+            return NULL;
+        }
+        if ((MSIZE_FLAGS_FOR_PTR(ptr) & ~ PREV_FREE) < MIN_BLOCK) {
+            malloc_printf("ptr=%p this=%d msize=%d\n", ptr, MSIZE_FLAGS_FOR_PTR(ptr), (int)msize);
+            szone_error(szone, "Pointer allocated has improper size (2)", ptr);
+            return NULL;
+        }
+#endif
     } else {
-	/* need to convert large entry to huge entry */
-
-	/* release large entry, note we still have the VM allocation */
-	large_entry = large_entry_for_pointer_no_lock(szone, ptr);
-	saved_entry = *large_entry; // in case we need to put it back
-	large_free_no_lock(szone, large_entry);
-
-	/* and get a huge entry */
-	huge.address = (vm_address_t)ptr;
-	huge.size = new_size;	/* fix up size */
-	SZONE_UNLOCK(szone);
-	if (huge_entry_append(szone, huge)) {
-	    szone->num_bytes_in_huge_objects += new_size;
-	    return 1; // success!
-	}
-	SZONE_LOCK(szone);
-	// we leak memory (the extra space appended) but data structures are correct
-	large_entry_insert_no_lock(szone, saved_entry); // this will reinsert the large entry
-    }
-    SZONE_UNLOCK(szone); // we release the lock asap
-    return 1;
-}
-
-/*********************	Zone call backs	************************/
-
-static void
-szone_free(szone_t *szone, void *ptr)
-{
-    region_t	*tiny_region;
-    region_t	*small_region;
-
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr))
-	malloc_printf("in szone_free with %p\n", ptr);
-#endif
-    if (!ptr)
-	return;
-    /*
-     * Try to free to a tiny region.
-     */
-    if ((uintptr_t)ptr & (TINY_QUANTUM - 1)) {
-	szone_error(szone, "Non-aligned pointer being freed", ptr, NULL);
-	return;
-    }
-    if ((tiny_region = tiny_region_for_ptr_no_lock(szone, ptr)) != NULL) {
-	if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) {
-	    szone_error(szone, "Pointer to metadata being freed", ptr, NULL);
-	    return;
-	}
-	free_tiny(szone, ptr, tiny_region);
-	return;
-    }
-
-    /*
-     * Try to free to a small region.
-     */
-    if ((uintptr_t)ptr & (SMALL_QUANTUM - 1)) {
-	szone_error(szone, "Non-aligned pointer being freed (2)", ptr, NULL);
-	return;
-    }
-    if ((small_region = small_region_for_ptr_no_lock(szone, ptr)) != NULL) {
-	if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS) {
-	    szone_error(szone, "Pointer to metadata being freed (2)", ptr, NULL);
-	    return;
-	}
-	free_small(szone, ptr, small_region);
-	return;
-    }
-
-    /* check that it's a legal large/huge allocation */
-    if ((uintptr_t)ptr & (vm_page_size - 1)) {
-	szone_error(szone, "non-page-aligned, non-allocated pointer being freed", ptr, NULL);
-	return;
-    }
-    free_large_or_huge(szone, ptr);
-}
-
-static INLINE void *
-szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested)
-{
+        unsigned		num_pages;
+        num_pages = round_page(size) >> vm_page_shift;
+        ptr = large_and_huge_malloc(szone, num_pages);
+    }
+    if (LOG(szone, ptr)) malloc_printf("szone_malloc returned %p\n", ptr);
+    return ptr;
+}
+
+static void *szone_malloc(szone_t *szone, size_t size) {
+    return szone_malloc_should_clear(szone, size, 0);
+}
+
+static void *szone_calloc(szone_t *szone, size_t num_items, size_t size) {
+    return szone_malloc_should_clear(szone, num_items * size, 1);
+}
+
+static void *szone_valloc(szone_t *szone, size_t size) {
     void	*ptr;
-    msize_t	msize;
-
-    if (size <= 31*TINY_QUANTUM) {
-	// think tiny
-	msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
-	if (!msize)
-	    msize = 1;
-	ptr = tiny_malloc_should_clear(szone, msize, cleared_requested);
-    } else if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) && (size < LARGE_THRESHOLD)) {
-	// think small
-	msize = SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1);
-	if (! msize) msize = 1;
-	ptr = small_malloc_should_clear(szone, msize, cleared_requested);
-    } else {
-	// large or huge
-	size_t num_pages = round_page(size) >> vm_page_shift;
-	if (num_pages == 0)	/* Overflowed */
-		ptr = 0;
-	else
-	ptr = large_and_huge_malloc(szone, num_pages);
-    }
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr))
-	malloc_printf("szone_malloc returned %p\n", ptr);
-#endif
-    /*
-     * If requested, scribble on allocated memory.
-     */
-    if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && ptr && !cleared_requested && size)
-	memset(ptr, 0xaa, size);
-
-    return ptr;
-}
-
-static void *
-szone_malloc(szone_t *szone, size_t size) {
-    return szone_malloc_should_clear(szone, size, 0);
-}
-
-static void *
-szone_calloc(szone_t *szone, size_t num_items, size_t size)
-{
-    size_t total_bytes = num_items * size;
-    if ((num_items > 1) && (size != 0) && ((total_bytes / size) != num_items))
-        return NULL;
-    return szone_malloc_should_clear(szone, total_bytes, 1);
-}
-
-static void *
-szone_valloc(szone_t *szone, size_t size)
-{
-    void	*ptr;
-    size_t num_pages;
-    
+    unsigned	num_pages;
     num_pages = round_page(size) >> vm_page_shift;
     ptr = large_and_huge_malloc(szone, num_pages);
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr))
-	malloc_printf("szone_valloc returned %p\n", ptr);
-#endif
+    if (LOG(szone, ptr)) malloc_printf("szone_valloc returned %p\n", ptr);
     return ptr;
 }
 
-static size_t
-szone_size(szone_t *szone, const void *ptr)
-{
+static size_t szone_size(szone_t *szone, const void *ptr) {
     size_t		size = 0;
-    boolean_t		is_free;
-    msize_t		msize, msize_and_free;
+    region_t		*region;
     large_entry_t	*entry;
     huge_entry_t	*huge;
-    
-    if (!ptr)
-	return 0;
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr)) {
-	malloc_printf("in szone_size for %p (szone=%p)\n", ptr, szone);
-    }
-#endif
-
-    /*
-     * Look for it in a tiny region.
-     */
-    if ((uintptr_t)ptr & (TINY_QUANTUM - 1))
-	return 0;
-    if (tiny_region_for_ptr_no_lock(szone, ptr)) {
-	if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS)
-	    return 0;
-	msize = get_tiny_meta_header(ptr, &is_free);
-	return (is_free) ? 0 : TINY_BYTES_FOR_MSIZE(msize);
-    }
-    
-    /*
-     * Look for it in a small region.
-     */
-    if ((uintptr_t)ptr & (SMALL_QUANTUM - 1))
-	return 0;
-    if (small_region_for_ptr_no_lock(szone, ptr)) {
-	if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS)
-	    return 0;
-	msize_and_free = *SMALL_METADATA_FOR_PTR(ptr);
-	return (msize_and_free & SMALL_IS_FREE) ? 0 : SMALL_BYTES_FOR_MSIZE(msize_and_free);
-    }
-
-    /*
-     * If not page-aligned, it cannot have come from a large or huge allocation.
-     */
-    if ((uintptr_t)ptr & (vm_page_size - 1))
-	return(0);
-
-    /*
-     * Look for it in a large or huge entry.
-     */
+    if (!ptr) return 0;
+    if (LOG(szone, ptr)) malloc_printf("In szone_size for %p (szone=%p)\n", ptr, szone);
+    if ((vm_address_t)ptr & (QUANTUM - 1)) return 0;
+    if ((((unsigned)ptr) & (vm_page_size - 1)) && (MSIZE_FLAGS_FOR_PTR(ptr) & THIS_FREE)) {
+        // not page aligned, but definitely not in use
+        return 0;
+    }
+    // Try a small pointer
+    region = region_for_ptr_no_lock(szone, ptr);
+    // malloc_printf("FOUND REGION %p\n", region);
+    if (region) {
+        // this is indeed a valid pointer
+        msize_t		msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
+        return (msize_and_free & THIS_FREE) ? 0 : ((msize_and_free & ~PREV_FREE) << SHIFT_QUANTUM) - PTR_HEADER_SIZE;
+    }
+    if (((unsigned)ptr) & (vm_page_size - 1)) {
+        return 0;
+    }
     SZONE_LOCK(szone);
     entry = large_entry_for_pointer_no_lock(szone, ptr);
     if (entry) {
-	size = LARGE_ENTRY_SIZE(*entry);
+        size = LARGE_ENTRY_SIZE(*entry);
     } else if ((huge = huge_entry_for_pointer_no_lock(szone, ptr))) {
-	size = huge->size;
+        size = huge->size;
     }
     SZONE_UNLOCK(szone); 
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr)) {
-	malloc_printf("szone_size for %p returned %d\n", ptr, (unsigned)size);
-    }
-#endif
+    // malloc_printf("szone_size for large/huge %p returned %d\n", ptr, (unsigned)size);
+    if (LOG(szone, ptr)) malloc_printf("szone_size for %p returned %d\n", ptr, (unsigned)size);
     return size;
 }
 
-static void *
-szone_realloc(szone_t *szone, void *ptr, size_t new_size)
-{
-    size_t		old_size;
+static INLINE int try_realloc_small_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) {
+    // returns 1 on success
+    void	*next_block = (char *)ptr + old_size + PTR_HEADER_SIZE;
+    msize_t	next_msize_and_free;
+    msize_t	next_msize;
+    region_t	region;
+    msize_t	coalesced_msize;
+    msize_t	leftover_msize;
+    msize_t	new_msize_and_free;
+    void	*following_ptr;
+    SZONE_LOCK(szone);
+    region = szone->regions[szone->num_regions - 1];
+    if (((vm_address_t)ptr >= region) && ((vm_address_t)ptr < region + REGION_SIZE) && ((vm_address_t)next_block == REGION_END(region) - szone->num_bytes_free_in_last_region + PTR_HEADER_SIZE)) {
+	// This could be optimized but it is so rare it's not worth it
+	SZONE_UNLOCK(szone);
+	return 0;
+    }
+    // If the next block is free, we coalesce
+    next_msize_and_free = MSIZE_FLAGS_FOR_PTR(next_block);
+#if DEBUG_MALLOC
+    if ((vm_address_t)next_block & (QUANTUM - 1)) {
+	szone_error(szone, "Internal invariant broken in realloc(next_block)", next_block);
+    }
+    if (next_msize_and_free & PREV_FREE) {
+	malloc_printf("try_realloc_small_in_place: 0x%x=PREV_FREE|%d\n", next_msize_and_free, next_msize_and_free & ~PREV_FREE);
+	SZONE_UNLOCK(szone);
+	return 0;
+    }
+#endif
+    next_msize = next_msize_and_free & ~THIS_FREE;
+    if (!(next_msize_and_free & THIS_FREE) || !next_msize || (old_size + (next_msize << SHIFT_QUANTUM) < new_size)) {
+	SZONE_UNLOCK(szone);
+	return 0;
+    }
+    coalesced_msize = (new_size - old_size + QUANTUM - 1) >> SHIFT_QUANTUM;
+    leftover_msize = next_msize - coalesced_msize;
+    new_msize_and_free = MSIZE_FLAGS_FOR_PTR(ptr);
+    // malloc_printf("Realloc in place for %p;  current msize=%d next_msize=%d wanted=%d\n", ptr, MSIZE_FLAGS_FOR_PTR(ptr), next_msize, new_size);
+    free_list_remove_ptr(szone, next_block, next_msize);
+    if ((leftover_msize < MIN_BLOCK) || (leftover_msize < coalesced_msize / 4)) {
+	// don't bother splitting it off
+	// malloc_printf("No leftover ");
+	coalesced_msize = next_msize;
+	leftover_msize = 0;
+    } else {
+	void	*leftover = next_block + (coalesced_msize << SHIFT_QUANTUM);
+	// malloc_printf("Leftover ");
+	free_list_add_ptr(szone, leftover, leftover_msize);
+    }
+    new_msize_and_free += coalesced_msize;
+    MSIZE_FLAGS_FOR_PTR(ptr) = new_msize_and_free;
+    following_ptr = FOLLOWING_PTR(ptr, new_msize_and_free & ~PREV_FREE);
+    MSIZE_FLAGS_FOR_PTR(following_ptr) &= ~ PREV_FREE;
+#if DEBUG_MALLOC
+    {
+	msize_t	ms = MSIZE_FLAGS_FOR_PTR(following_ptr);
+	msize_t	pms = PREVIOUS_MSIZE(FOLLOWING_PTR(following_ptr, ms & ~THIS_FREE));
+	malloc_printf("Following ptr of coalesced (%p) has msize_and_free=0x%x=%s%d end_of_block_marker=%d\n", following_ptr, ms, (ms & THIS_FREE) ? "THIS_FREE|" : "", ms & ~THIS_FREE, pms);
+    }
+    if (LOG(szone,ptr)) malloc_printf("In szone_realloc(), ptr=%p, msize=%d\n", ptr, MSIZE_FLAGS_FOR_PTR(ptr));
+#endif
+    CHECK(szone, __PRETTY_FUNCTION__);
+    szone->num_bytes_in_small_objects += coalesced_msize << SHIFT_QUANTUM;
+    SZONE_UNLOCK(szone);
+    // malloc_printf("Extended ptr %p for realloc old=%d desired=%d new=%d leftover=%d\n", ptr, (unsigned)old_size, (unsigned)new_size, (unsigned)szone_size(szone, ptr), leftover_msize << SHIFT_QUANTUM);
+    return 1;
+}
+
+static INLINE int try_realloc_large_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) {
+    vm_address_t	addr = (vm_address_t)ptr + old_size;
+    large_entry_t	*entry;
+    kern_return_t	err;
+    if (((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL)) return 0; // don't want to bother with the protected case
+#if DEBUG_MALLOC
+    if (old_size != ((old_size >> vm_page_shift) << vm_page_shift)) malloc_printf("*** old_size is %d\n", old_size);
+#endif
+    // malloc_printf("=== Trying (1) to extend %p from %d to %d\n", ptr, old_size, new_size);
+    SZONE_LOCK(szone);
+    entry = large_entry_for_pointer_no_lock(szone, (void *)addr);
+    SZONE_UNLOCK(szone);
+    if (entry) return 0; // large pointer already exist in table - extension is not going to work
+    new_size = round_page(new_size);
+    // malloc_printf("=== Trying (2) to extend %p from %d to %d\n", ptr, old_size, new_size);
+    err = vm_allocate(mach_task_self(), &addr, new_size - old_size, VM_MAKE_TAG(VM_MEMORY_MALLOC_LARGE)); // we ask for allocation specifically at addr
+    if (err) return 0;
+    // we can just extend the block
+    SZONE_LOCK(szone);
+    entry = large_entry_for_pointer_no_lock(szone, ptr);
+    if (!entry) szone_error(szone, "large entry reallocated is not properly in table", ptr);
+    // malloc_printf("=== Successfully reallocated at end of %p from %d to %d\n", ptr, old_size, new_size);
+    entry->address_and_num_pages = (vm_address_t)ptr | (new_size >> vm_page_shift);
+    szone->num_bytes_in_large_objects += new_size - old_size;
+    SZONE_UNLOCK(szone); // we release the lock asap
+    return 1;
+}
+
+static void *szone_realloc(szone_t *szone, void *ptr, size_t new_size) {
+    size_t		old_size = 0;
     void		*new_ptr;
-    
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr)) {
-	malloc_printf("in szone_realloc for %p, %d\n", ptr, (unsigned)new_size);
-    }
-#endif
-    if (!ptr) {
-	ptr = szone_malloc(szone, new_size);
-	return ptr;
-    }
+    if (LOG(szone, ptr)) malloc_printf("In szone_realloc for %p, %d\n", ptr, (unsigned)new_size);
+    if (!ptr) return szone_malloc(szone, new_size);
     old_size = szone_size(szone, ptr);
     if (!old_size) {
-	szone_error(szone, "pointer being reallocated was not allocated", ptr, NULL);
-	return NULL;
-    }
-    /* we never shrink an allocation */
-    if (old_size >= new_size)
-	return ptr;
-
-    /*
-     * If the old and new sizes both suit the tiny allocator, try to reallocate in-place.
-     */
-    if ((new_size + TINY_QUANTUM - 1) <= 31 * TINY_QUANTUM) {
-	if (try_realloc_tiny_in_place(szone, ptr, old_size, new_size)) {
-	    return ptr;
+        szone_error(szone, "Pointer being reallocated was not allocated", ptr);
+        return NULL;
+    }
+    if (old_size >= new_size) return ptr;
+    if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) && ((new_size + PTR_HEADER_SIZE + QUANTUM - 1) < LARGE_THRESHOLD)) {
+        // We now try to realloc in place
+	if (try_realloc_small_in_place(szone, ptr, old_size, new_size)) return ptr;
+    }
+    if ((old_size > VM_COPY_THRESHOLD) && ((new_size + vm_page_size - 1) < (1 << (vm_page_shift + vm_page_shift)))) {
+	// we know it's a large block, and not a huge block (both for old and new)
+        kern_return_t	err = 0;
+        unsigned	num_pages;
+	large_entry_t	*entry;
+	vm_range_t	range;
+        num_pages = round_page(new_size) >> vm_page_shift;
+	if (try_realloc_large_in_place(szone, ptr, old_size, new_size)) return ptr;
+        new_ptr = large_and_huge_malloc(szone, num_pages);
+        err = vm_copy(mach_task_self(), (vm_address_t)ptr, old_size, (vm_address_t)new_ptr);
+        if (err) {
+            szone_error(szone, "Can't vm_copy region", ptr);
+        }
+	// We do not want the kernel to alias the old and the new, so we deallocate the old pointer right away and tear down the ptr-to-size data structure
+	SZONE_LOCK(szone);
+	entry = large_entry_for_pointer_no_lock(szone, ptr);
+	if (!entry) {
+	    szone_error(szone, "Can't find entry for large copied block", ptr);
 	}
-
-	/*
-	 * If the old and new sizes both suit the small allocator, and we're not protecting the
-	 * small allocations, try to reallocate in-place.
-	 */
-    } else if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) &&
-      ((new_size + SMALL_QUANTUM - 1) < LARGE_THRESHOLD) &&
-      (old_size > 31 * TINY_QUANTUM)) {
-	if (try_realloc_small_in_place(szone, ptr, old_size, new_size)) {
-	    return ptr;
-	}
-
-	/*
-	 * If the allocation's a large or huge allocation, try to reallocate in-place there.
-	 */
-    } else if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) && (old_size > LARGE_THRESHOLD)) {
-	if (try_realloc_large_or_huge_in_place(szone, ptr, old_size, new_size)) {
-	    return ptr;
-	}
-    }
-
-    /*
-     * Can't reallocate in place for whatever reason; allocate a new buffer and copy.
-     */
-    new_ptr = szone_malloc(szone, new_size);
-    if (new_ptr == NULL)
-	return NULL;
-
-    /*
-     * If the allocation's large enough, try to copy using VM.  If that fails, or
-     * if it's too small, just copy by hand.
-     */
-    if ((old_size < VM_COPY_THRESHOLD) ||
-      vm_copy(mach_task_self(), (vm_address_t)ptr, old_size, (vm_address_t)new_ptr))
-	memcpy(new_ptr, ptr, old_size);
+	range = large_free_no_lock(szone, entry);
+	SZONE_UNLOCK(szone); // we release the lock asap
+	// we truly deallocate_pages, including guard pages
+	deallocate_pages(szone, range.address, range.size, 0);
+	if (LOG(szone, ptr)) malloc_printf("szone_realloc returned %p for %d\n", new_ptr, (unsigned)new_size);
+	return new_ptr;
+    } else {
+	new_ptr = szone_malloc(szone, new_size);
+	if (new_ptr == NULL)
+	    return NULL;
+        memcpy(new_ptr, ptr, old_size);
+    }
     szone_free(szone, ptr);
-    
-#if DEBUG_MALLOC
-    if (LOG(szone, ptr)) {
-	malloc_printf("szone_realloc returned %p for %d\n", new_ptr, (unsigned)new_size);
-    }
-#endif
+    if (LOG(szone, ptr)) malloc_printf("szone_realloc returned %p for %d\n", new_ptr, (unsigned)new_size);
     return new_ptr;
 }
 
-// given a size, returns the number of pointers allocated capable of holding
-// that size, up to the limit specified by the 'count' argument.  These pointers
-// are stored in the 'results' array, which must be allocated by the caller.
-// may return zero, since this function is only a best attempt at allocating
-// the pointers.  clients should be prepared to call malloc for any additional
-// blocks they need.
-static unsigned
-szone_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count)
-{
-    msize_t     msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
-    unsigned    found = 0;
-
-    // only bother implementing this for tiny
-    if (size > 31*TINY_QUANTUM)
-        return 0;
-    // make sure to return objects at least one quantum in size
-    if (!msize)
-        msize = 1;
-
-    CHECK(szone, __PRETTY_FUNCTION__);
-
-	// We must lock the zone now, since tiny_malloc_from_free_list assumes that 
-	// the caller has done so.
-    SZONE_LOCK(szone);
-
-	// with the zone locked, allocate objects from the free list until all
-	// sufficiently large objects have been exhausted, or we have met our quota
-	// of objects to allocate.
-	while (found < count) {
-        void *ptr = tiny_malloc_from_free_list(szone, msize);
-        if (!ptr)
-            break;
-
-        *results++ = ptr;
-        found++;
-    }
-    SZONE_UNLOCK(szone);
-    return found;
-}
-
-static void
-szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count)
-{
-    unsigned		cc = 0;
-    void		*ptr;
-    region_t	*tiny_region;
-    boolean_t		is_free;
-    msize_t		msize;
-
-    // frees all the pointers in to_be_freed
-    // note that to_be_freed may be overwritten during the process
-    if (!count)
-	return;
-    CHECK(szone, __PRETTY_FUNCTION__);
-    SZONE_LOCK(szone);
-    while (cc < count) {
-	ptr = to_be_freed[cc];
-	if (ptr) {
-	    /* XXX this really slows us down */
-	    tiny_region = tiny_region_for_ptr_no_lock(szone, ptr);
-	    if (tiny_region) {
-		// this is a tiny pointer
-		if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS)
-		    break; // pointer to metadata; let the standard free deal with it
-		msize = get_tiny_meta_header(ptr, &is_free);
-		if (is_free)
-		    break; // a double free; let the standard free deal with it
-		tiny_free_no_lock(szone, tiny_region, ptr, msize);
-		to_be_freed[cc] = NULL;
-	    }
-	}
-	cc++;
-    }
-    SZONE_UNLOCK(szone);
-    CHECK(szone, __PRETTY_FUNCTION__);
-    while (count--) {
-	ptr = to_be_freed[count];
-	if (ptr)
-	    szone_free(szone, ptr);
-    }
-}
-
-static void
-szone_destroy(szone_t *szone)
-{
-    size_t		index;
-    large_entry_t	*large;
-    vm_range_t		range_to_deallocate;
-    huge_entry_t	*huge;
-
-    /* destroy large entries */
+static void szone_destroy(szone_t *szone) {
+    unsigned	index;
     index = szone->num_large_entries;
     while (index--) {
-	large = szone->large_entries + index;
-	if (!LARGE_ENTRY_IS_EMPTY(*large)) {
-	    // we deallocate_pages, including guard pages
-	    deallocate_pages(szone, (void *)LARGE_ENTRY_ADDRESS(*large), LARGE_ENTRY_SIZE(*large), szone->debug_flags);
-	}
-    }
-    if (szone->num_large_entries * sizeof(large_entry_t) >= LARGE_THRESHOLD) {
-	// we do not free in the small chunk case
-	large_entries_free_no_lock(szone, szone->large_entries, szone->num_large_entries, &range_to_deallocate);
-	if (range_to_deallocate.size)
-	    deallocate_pages(szone, (void *)range_to_deallocate.address, (size_t)range_to_deallocate.size, 0);
-    }
-
-    /* destroy huge entries */
+        large_entry_t	*entry = szone->large_entries + index;
+        if (!LARGE_ENTRY_IS_EMPTY(*entry)) {
+            large_entry_t	range;
+            range = *entry;
+            // we deallocate_pages, including guard pages
+            deallocate_pages(szone, LARGE_ENTRY_ADDRESS(range), LARGE_ENTRY_SIZE(range), szone->debug_flags);
+        }
+    }
+    if (szone->num_large_entries * sizeof(large_entry_t) >= LARGE_THRESHOLD) large_entries_free_no_lock(szone, szone->large_entries, szone->num_large_entries); // we do not free in the small chunk case
     index = szone->num_huge_entries;
     while (index--) {
-	huge = szone->huge_entries + index;
-	deallocate_pages(szone, (void *)huge->address, huge->size, szone->debug_flags);
-    }
-    
-    /* destroy tiny regions */
-    for (index = 0; index < szone->num_tiny_regions_allocated; ++index)
-      if (szone->tiny_regions[index])
-        deallocate_pages(szone, szone->tiny_regions[index], TINY_REGION_SIZE, 0);
-
-    /* destroy small regions */
-    for (index = 0; index < szone->num_small_regions_allocated; ++index)
-      if (szone->small_regions[index])
-        deallocate_pages(szone, szone->small_regions[index], SMALL_REGION_SIZE, 0);
-
-    /* destroy region hash rings, if any */
-    if (szone->tiny_regions != szone->initial_tiny_regions) {
-      size_t size = round_page(szone->num_tiny_regions_allocated * sizeof(region_t));
-      deallocate_pages(szone, szone->tiny_regions, size, 0);
-    }
-    if (szone->small_regions != szone->initial_small_regions) {
-      size_t size = round_page(szone->num_small_regions_allocated * sizeof(region_t));
-      deallocate_pages(szone, szone->small_regions, size, 0);
-    }
-    /* Now destroy the separate szone region */
-    deallocate_pages(szone, (void *)szone, SZONE_PAGED_SIZE, SCALABLE_MALLOC_ADD_GUARD_PAGES);
-}
-
-static size_t
-szone_good_size(szone_t *szone, size_t size)
-{
-    msize_t	msize;
-    unsigned	num_pages;
-    
-    if (size <= 31 * TINY_QUANTUM) {
-	// think tiny
-	msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
-	if (! msize) msize = 1;
-	return TINY_BYTES_FOR_MSIZE(msize);
-    }
+        huge_entry_t	*huge = szone->huge_entries + index;
+        deallocate_pages(szone, huge->address, huge->size, szone->debug_flags);
+    }
+    // and now we free regions, with regions[0] as the last one (the final harakiri)
+    index = szone->num_regions;
+    while (index--) { // we skip the first region, that is the zone itself
+        region_t	region = szone->regions[index];
+        deallocate_pages(szone, REGION_ADDRESS(region), REGION_SIZE, 0);
+    }
+}
+
+static size_t szone_good_size(szone_t *szone, size_t size) {
     if (!((szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) && PROTECT_SMALL) && (size < LARGE_THRESHOLD)) {
-	// think small
-	msize = SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1);
-	if (! msize) msize = 1;
-	return SMALL_BYTES_FOR_MSIZE(msize);
+        // think small
+        msize_t	msize = (size + PTR_HEADER_SIZE + QUANTUM - 1) >> SHIFT_QUANTUM;
+        if (msize < MIN_BLOCK) msize = MIN_BLOCK;
+        return (msize << SHIFT_QUANTUM) - PTR_HEADER_SIZE;
     } else {
-	num_pages = round_page(size) >> vm_page_shift;
-	if (!num_pages)
-	    num_pages = 1; // minimal allocation size for this
-	return num_pages << vm_page_shift;
+        unsigned		num_pages;
+        num_pages = round_page(size) >> vm_page_shift;
+        if (!num_pages) num_pages = 1; // minimal allocation size for this
+        return num_pages << vm_page_shift;
     }
 }
 
@@ -3769,278 +1374,160 @@
 unsigned szone_check_start = 0;
 unsigned szone_check_modulo = 1;
 
-static boolean_t
-szone_check_all(szone_t *szone, const char *function)
-{
-    size_t index;
-
+static boolean_t szone_check_all(szone_t *szone, const char *function) {
+    unsigned	index = 0;
     SZONE_LOCK(szone);
-    CHECK_LOCKED(szone, __PRETTY_FUNCTION__);
-
-    /* check tiny regions - chould check region count */
-    for (index = 0; index < szone->num_tiny_regions_allocated; ++index) {
-      region_t tiny = szone->tiny_regions[index];
-      if (tiny && !tiny_check_region(szone, tiny)) {
-        SZONE_UNLOCK(szone);
-        szone->debug_flags &= ~ CHECK_REGIONS;
-        szone_error(szone, "check: tiny region incorrect", NULL,
-                    "*** tiny region %d incorrect szone_check_all(%s) counter=%d\n",
-                    index, function, szone_check_counter);
-        return 0;
-      }
-    }
-    /* check tiny free lists */
-    for (index = 0; index < NUM_TINY_SLOTS; ++index) {
-      if (!tiny_free_list_check(szone, index)) {
-        SZONE_UNLOCK(szone);
-        szone->debug_flags &= ~ CHECK_REGIONS;
-        szone_error(szone, "check: tiny free list incorrect", NULL,
-                    "*** tiny free list incorrect (slot=%d) szone_check_all(%s) counter=%d\n",
-                    index, function, szone_check_counter);
-        return 0;
-      }
-    }
-    /* check small regions - could check region count */
-    for (index = 0; index < szone->num_small_regions_allocated; ++index) {
-      region_t small = szone->small_regions[index];
-      if (small && !szone_check_small_region(szone, small)) {
-        SZONE_UNLOCK(szone);
-        szone->debug_flags &= ~ CHECK_REGIONS;
-        szone_error(szone, "check: small region incorrect", NULL,
-                    "*** small region %d incorrect szone_check_all(%s) counter=%d\n",
-                    index, function, szone_check_counter);
-        return 0;
-      }
-    }
-    /* check small free lists */
-    for (index = 0; index < NUM_SMALL_SLOTS; ++index) {
-      if (!small_free_list_check(szone, index)) {
-        SZONE_UNLOCK(szone);
-        szone->debug_flags &= ~ CHECK_REGIONS;
-        szone_error(szone, "check: small free list incorrect", NULL,
-                    "*** small free list incorrect (grain=%d) szone_check_all(%s) counter=%d\n",
-                    index, function, szone_check_counter);
-        return 0;
-      }
+    while (index < szone->num_regions) {
+        region_t	*region = szone->regions + index++;
+        if (!szone_check_region(szone, region)) {
+            SZONE_UNLOCK(szone);
+            szone->debug_flags &= ~ CHECK_REGIONS;
+            malloc_printf("*** malloc[%d]: Region %d incorrect szone_check_all(%s) counter=%d\n", getpid(), index-1, function, szone_check_counter);
+            szone_error(szone, "Check: region incorrect", NULL);
+            return 0;
+        }
+    }
+    index = 0;
+    while (index < MAX_GRAIN) {
+        if (! free_list_check(szone, index)) {
+            SZONE_UNLOCK(szone);
+            szone->debug_flags &= ~ CHECK_REGIONS;
+            malloc_printf("*** malloc[%d]: Free list incorrect (grain=%d) szone_check_all(%s) counter=%d\n", getpid(), index, function, szone_check_counter);
+            szone_error(szone, "Check: free list incorrect", NULL);
+            return 0;
+        }
+        index++;
     }
     SZONE_UNLOCK(szone);
     return 1;
 }
 
-static boolean_t
-szone_check(szone_t *szone)
-{
-    if ((++szone_check_counter % 10000) == 0)
-	_malloc_printf(ASL_LEVEL_NOTICE, "at szone_check counter=%d\n", szone_check_counter);
-    if (szone_check_counter < szone_check_start)
-	return 1;
-    if (szone_check_counter % szone_check_modulo)
-	return 1;
+static boolean_t szone_check(szone_t *szone) {
+    if (! (++szone_check_counter % 10000)) {
+        malloc_printf("At szone_check counter=%d\n", szone_check_counter);
+    }
+    if (szone_check_counter < szone_check_start) return 1;
+    if (szone_check_counter % szone_check_modulo) return 1;
     return szone_check_all(szone, "");
 }
 
-static kern_return_t
-szone_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder)
-{
+static kern_return_t szone_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder) {
     szone_t		*szone;
     kern_return_t	err;
-    
     if (!reader) reader = _szone_default_reader;
+    // malloc_printf("Enumerator for zone 0x%x\n", zone_address);
     err = reader(task, zone_address, sizeof(szone_t), (void **)&szone);
     if (err) return err;
-    err = tiny_in_use_enumerator(task, context, type_mask, szone, reader, recorder);
+    // malloc_printf("Small ptrs enumeration for zone 0x%x\n", zone_address);
+    err = small_in_use_enumerator(task, context, type_mask, (vm_address_t)szone->regions, szone->num_regions, reader, recorder);
     if (err) return err;
-    err = small_in_use_enumerator(task, context, type_mask, szone, reader, recorder);
+    // malloc_printf("Large ptrs enumeration for zone 0x%x\n", zone_address);
+    err = large_in_use_enumerator(task, context, type_mask, (vm_address_t)szone->large_entries, szone->num_large_entries, reader, recorder);
     if (err) return err;
-    err = large_in_use_enumerator(task, context, type_mask,
-      (vm_address_t)szone->large_entries, szone->num_large_entries, reader,
-      recorder);
-    if (err) return err;
-    err = huge_in_use_enumerator(task, context, type_mask,
-      (vm_address_t)szone->huge_entries, szone->num_huge_entries, reader,
-      recorder);
+    // malloc_printf("Huge ptrs enumeration for zone 0x%x\n", zone_address);
+    err = huge_in_use_enumerator(task, context, type_mask, (vm_address_t)szone->huge_entries, szone->num_huge_entries, reader, recorder);
     return err;
 }
 
-// Following method is deprecated:  use scalable_zone_statistics instead
-void
-scalable_zone_info(malloc_zone_t *zone, unsigned *info_to_fill, unsigned count)
-{
+static void szone_print_free_list(szone_t *szone) {
+    grain_t		grain = MAX_GRAIN;
+    malloc_printf("Free Sizes: ");
+    while (grain--) {
+        free_list_t	*ptr = szone->free_list[grain];
+        if (ptr) {
+            unsigned	count = 0;
+            while (ptr) {
+                count++;
+                // malloc_printf("%p ", ptr);
+                ptr = ptr->next;
+            }
+            malloc_printf("%s%d[%d] ", (grain == MAX_GRAIN-1) ? ">=" : "", (grain+1)*QUANTUM, count);
+        }
+    }
+    malloc_printf("\n");
+}
+
+static void szone_print(szone_t *szone, boolean_t verbose) {
+    unsigned	info[scalable_zone_info_count];
+    unsigned	index = 0;
+    scalable_zone_info((void *)szone, info, scalable_zone_info_count);
+    malloc_printf("Scalable zone %p: inUse=%d(%dKB) small=%d(%dKB) large=%d(%dKB) huge=%d(%dKB) guard_page=%d\n", szone, info[0], info[1] / 1024, info[2], info[3] / 1024, info[4], info[5] / 1024, info[6], info[7] / 1024, info[8]);
+    malloc_printf("%d regions: \n", szone->num_regions);
+    while (index < szone->num_regions) {
+        region_t	*region = szone->regions + index;
+        unsigned	counts[512];
+        unsigned	ci = 0;
+        unsigned	in_use = 0;
+        vm_address_t	start = REGION_ADDRESS(*region) + QUANTUM;
+        memset(counts, 0, 512 * sizeof(unsigned));
+        while (start < REGION_END(*region)) {
+            msize_t	msize_and_free = MSIZE_FLAGS_FOR_PTR(start);
+            if (!(msize_and_free & THIS_FREE)) {
+                msize_t	msize = msize_and_free & ~PREV_FREE;
+                if (!msize) break; // last encountered
+                // block in use
+                if (msize < 512) counts[msize]++;
+                start += msize << SHIFT_QUANTUM;
+                in_use++;
+            } else {
+                msize_t	msize = msize_and_free & ~THIS_FREE;
+                // free block
+                start += msize << SHIFT_QUANTUM;
+            }
+        }
+        malloc_printf("Region [0x%x-0x%x, %dKB] \tIn_use=%d ", REGION_ADDRESS(*region), REGION_END(*region), (int)REGION_SIZE / 1024, in_use);
+        if (verbose) {
+            malloc_printf("\n\tSizes in use: ");
+            while (ci < 512) {
+                if (counts[ci]) malloc_printf("%d[%d] ", ci << SHIFT_QUANTUM, counts[ci]);
+                ci++;
+            }
+        }
+        malloc_printf("\n");
+        index++;
+    }
+    if (verbose) szone_print_free_list(szone);
+    malloc_printf("Free in last zone %d\n", szone->num_bytes_free_in_last_region);
+}
+
+static void szone_log(malloc_zone_t *zone, void *log_address) {
     szone_t	*szone = (void *)zone;
-    unsigned	info[13];
-    
-    // We do not lock to facilitate debug
-    info[4] = szone->num_tiny_objects;
-    info[5] = szone->num_bytes_in_tiny_objects;
-    info[6] = szone->num_small_objects;
-    info[7] = szone->num_bytes_in_small_objects;
-    info[8] = szone->num_large_objects_in_use;
-    info[9] = szone->num_bytes_in_large_objects;
-    info[10] = szone->num_huge_entries;
-    info[11] = szone->num_bytes_in_huge_objects;
-    info[12] = szone->debug_flags;
-    info[0] = info[4] + info[6] + info[8] + info[10];
-    info[1] = info[5] + info[7] + info[9] + info[11];
-    info[3] = szone->num_tiny_regions * TINY_REGION_SIZE + szone->num_small_regions * SMALL_REGION_SIZE + info[9] + info[11];
-    info[2] = info[3] - szone->tiny_bytes_free_at_end - szone->small_bytes_free_at_end;
-    memcpy(info_to_fill, info, sizeof(unsigned)*count);
-}
-
-static void
-szone_print(szone_t *szone, boolean_t verbose)
-{
-  unsigned  info[13];
-  size_t    index;
-  region_t  region;
-  
-  SZONE_LOCK(szone);
-  scalable_zone_info((void *)szone, info, 13);
-  _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
-                 "Scalable zone %p: inUse=%d(%y) touched=%y allocated=%y flags=%d\n",
-                 szone, info[0], info[1], info[2], info[3], info[12]);
-  _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
-                 "\ttiny=%d(%y) small=%d(%y) large=%d(%y) huge=%d(%y)\n",
-                 info[4], info[5], info[6], info[7], info[8], info[9], info[10], info[11]);
-  // tiny
-  _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
-                 "%d tiny regions:\n", szone->num_tiny_regions);
-  for (index = 0; index < szone->num_tiny_regions_allocated; ++index) {
-    region = szone->tiny_regions[index];
-    if (region)
-      print_tiny_region(verbose, region, (region == szone->last_tiny_region) ? 
-                                         szone->tiny_bytes_free_at_end : 0);
-  }
-  if (verbose)
-    print_tiny_free_list(szone);
-  // small
-  _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
-                 "%d small regions:\n", szone->num_small_regions);
-  for (index = 0; index < szone->num_small_regions_allocated; ++index) {
-    region = szone->small_regions[index];
-    if (region)
-      print_small_region(szone, verbose, region,
-                         (region == szone->last_small_region) ? 
-                         szone->small_bytes_free_at_end : 0);
-  }
-  if (verbose)
-    print_small_free_list(szone);
-  SZONE_UNLOCK(szone);
-}
-
-static void
-szone_log(malloc_zone_t *zone, void *log_address)
-{
-    szone_t	*szone = (szone_t *)zone;
-
     szone->log_address = log_address;
 }
 
-static void
-szone_force_lock(szone_t *szone)
-{
+static void szone_force_lock(szone_t *szone) {
+    // malloc_printf("szone_force_lock\n");
     SZONE_LOCK(szone);
 }
 
-static void
-szone_force_unlock(szone_t *szone)
-{
+static void szone_force_unlock(szone_t *szone) {
+    // malloc_printf("szone_force_unlock\n");
     SZONE_UNLOCK(szone);
 }
 
-boolean_t
-scalable_zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats, unsigned subzone)
-{
-    szone_t *szone = (szone_t *)zone;
-    
-    switch (subzone) {
-	case 0:	
-	    stats->blocks_in_use = szone->num_tiny_objects;
-	    stats->size_in_use = szone->num_bytes_in_tiny_objects;
-	    stats->size_allocated = szone->num_tiny_regions * TINY_REGION_SIZE;
-	    stats->max_size_in_use = stats->size_allocated - szone->tiny_bytes_free_at_end;
-	    return 1;
-	case 1:	
-	    stats->blocks_in_use = szone->num_small_objects;
-	    stats->size_in_use = szone->num_bytes_in_small_objects;
-	    stats->size_allocated = szone->num_small_regions * SMALL_REGION_SIZE;
-	    stats->max_size_in_use = stats->size_allocated - szone->small_bytes_free_at_end;
-	    return 1;
-	case 2:
-	    stats->blocks_in_use = szone->num_large_objects_in_use;
-	    stats->size_in_use = szone->num_bytes_in_large_objects;
-	    stats->max_size_in_use = stats->size_allocated = stats->size_in_use;
-	    return 1;
-	case 3:
-	    stats->blocks_in_use = szone->num_huge_entries;
-	    stats->size_in_use = szone->num_bytes_in_huge_objects;
-	    stats->max_size_in_use = stats->size_allocated = stats->size_in_use;
-	    return 1;
-    }
-    return 0;
-}
-
-static void
-szone_statistics(szone_t *szone, malloc_statistics_t *stats)
-{
-    size_t	big_and_huge;
-    
-    stats->blocks_in_use =
-      szone->num_tiny_objects +
-      szone->num_small_objects +
-      szone->num_large_objects_in_use +
-      szone->num_huge_entries;
-    big_and_huge = szone->num_bytes_in_large_objects + szone->num_bytes_in_huge_objects;
-    stats->size_in_use = szone->num_bytes_in_tiny_objects + szone->num_bytes_in_small_objects + big_and_huge;
-    stats->max_size_in_use = stats->size_allocated =
-      szone->num_tiny_regions * TINY_REGION_SIZE +
-      szone->num_small_regions * SMALL_REGION_SIZE +
-      big_and_huge ; 
-
-    // Now we account for the untouched areas
-    stats->max_size_in_use -= szone->tiny_bytes_free_at_end;
-    stats->max_size_in_use -= szone->small_bytes_free_at_end;
-}
-
-static const struct malloc_introspection_t szone_introspect = {
-    (void *)szone_ptr_in_use_enumerator,
-    (void *)szone_good_size,
-    (void *)szone_check,
-    (void *)szone_print,
-    szone_log,
-    (void *)szone_force_lock,
-    (void *)szone_force_unlock,
-    (void *)szone_statistics
-}; // marked as const to spare the DATA section
-
-malloc_zone_t *
-create_scalable_zone(size_t initial_size, unsigned debug_flags)
-{
+static struct malloc_introspection_t szone_introspect = {(void *)szone_ptr_in_use_enumerator, (void *)szone_good_size, (void *)szone_check, (void *)szone_print, szone_log, (void *)szone_force_lock, (void *)szone_force_unlock};
+
+malloc_zone_t *create_scalable_zone(size_t initial_size, unsigned debug_flags) {
     szone_t		*szone;
-
-    /*
-     * Sanity-check our build-time assumptions about the size of a page.
-     * Since we have sized various things assuming the default page size,
-     * attempting to determine it dynamically is not useful.
-     */
-    if ((vm_page_size != _vm_page_size) || (vm_page_shift != _vm_page_shift)) {
-	malloc_printf("*** FATAL ERROR - machine page size does not match our assumptions.\n");
-	exit(-1);
-    }
-
-    /* get memory for the zone, which is now separate from any region.
-       add guard pages to prevent walking from any other vm allocations
-       to here and overwriting the function pointers in basic_zone. */
-    szone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, 
-                           SCALABLE_MALLOC_ADD_GUARD_PAGES, 
-                           VM_MEMORY_MALLOC);
-    if (!szone)
-        return NULL;
-    /* set up the szone structure */
-    szone->tiny_regions = szone->initial_tiny_regions;
-    szone->small_regions = szone->initial_small_regions;
-    szone->num_tiny_regions_allocated = INITIAL_NUM_REGIONS;
-    szone->num_small_regions_allocated = INITIAL_NUM_REGIONS;
-    szone->basic_zone.version = 3;
+    vm_address_t	addr;
+    size_t		msize;
+    size_t		msize_used = 0;
+    // malloc_printf("=== create_scalable_zone(%d,%d);\n", initial_size, debug_flags);
+    if (!vm_page_shift) {
+        unsigned	page;
+        vm_page_shift = 12; // the minimal for page sizes
+        page = 1 << vm_page_shift;
+        while (page != vm_page_size) { page += page; vm_page_shift++;};
+        if (MIN_BLOCK * QUANTUM < sizeof(free_list_t) + PTR_HEADER_SIZE) {
+            malloc_printf("*** malloc[%d]: inconsistant parameters\n", getpid());
+        }
+    }
+    addr = allocate_pages(NULL, REGION_SIZE, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC));
+    if (!addr) return NULL;
+    szone = (void *)(addr + QUANTUM);
+    msize = (sizeof(szone_t) + PTR_HEADER_SIZE + QUANTUM-1) >> SHIFT_QUANTUM;
+    MSIZE_FLAGS_FOR_PTR(szone) = msize;
+    msize_used += msize; szone->num_small_objects++;
     szone->basic_zone.size = (void *)szone_size;
     szone->basic_zone.malloc = (void *)szone_malloc;
     szone->basic_zone.calloc = (void *)szone_calloc;
@@ -4048,22 +1535,38 @@
     szone->basic_zone.free = (void *)szone_free;
     szone->basic_zone.realloc = (void *)szone_realloc;
     szone->basic_zone.destroy = (void *)szone_destroy;
-    szone->basic_zone.batch_malloc = (void *)szone_batch_malloc;
-    szone->basic_zone.batch_free = (void *)szone_batch_free;
-    szone->basic_zone.introspect = (struct malloc_introspection_t *)&szone_introspect;
+    szone->basic_zone.introspect = &szone_introspect;
+    LOCK_INIT(szone->lock);
     szone->debug_flags = debug_flags;
-    LOCK_INIT(szone->lock);
-
-#if 0
-#warning CHECK_REGIONS enabled
-    debug_flags |= CHECK_REGIONS;
-#endif
-#if 0
-#warning LOG enabled
-    szone->log_address = ~0;
-#endif
+    szone->regions = (void *)((char *)szone + (msize << SHIFT_QUANTUM));
+    // we always reserve room for a few regions
+    msize = (sizeof(region_t) * INITIAL_NUM_REGIONS + PTR_HEADER_SIZE + QUANTUM-1) >> SHIFT_QUANTUM;
+    if (msize < MIN_BLOCK) msize = MIN_BLOCK;
+    MSIZE_FLAGS_FOR_PTR(szone->regions) = msize;
+    msize_used += msize; szone->num_small_objects++;
+    szone->regions[0] = addr;
+    szone->num_regions = 1;
+    szone->num_bytes_free_in_last_region = REGION_SIZE - ((msize_used+1) << SHIFT_QUANTUM) + PTR_HEADER_SIZE;
     CHECK(szone, __PRETTY_FUNCTION__);
     return (malloc_zone_t *)szone;
+}
+
+/*********	The following is private API for debug and perf tools	************/
+
+void scalable_zone_info(malloc_zone_t *zone, unsigned *info_to_fill, unsigned count) {
+    szone_t	*szone = (void *)zone;
+    unsigned	info[scalable_zone_info_count];
+    // We do not lock to facilitate debug
+    info[2] = szone->num_small_objects;
+    info[3] = szone->num_bytes_in_small_objects;
+    info[4] = szone->num_large_objects_in_use;
+    info[5] = szone->num_bytes_in_large_objects;
+    info[6] = szone->num_huge_entries;
+    info[7] = szone->num_bytes_in_huge_objects;
+    info[8] = szone->debug_flags;
+    info[0] = info[2] + info[4] + info[6];
+    info[1] = info[3] + info[5] + info[7];
+    memcpy(info_to_fill, info, sizeof(unsigned)*count);
 }
 
 /********* Support code for emacs unexec ************/
@@ -4078,15 +1581,12 @@
  *    operations on freezedried memory.  This doesn't work, since scalable
  *    malloc does not store flags in front of large page-aligned allocations.
  * 3) Original szone-based freezedrying code.
- * 4) Fresher malloc with tiny zone
- * 5) 32/64bit compatible malloc
- * 6) Metadata within 1MB and 8MB region for tiny and small
  *
  * No version backward compatibility is provided, but the version number does
  * make it possible for malloc_jumpstart() to return an error if the application
  * was freezedried with an older version of malloc.
  */
-#define MALLOC_FREEZEDRY_VERSION 6
+#define MALLOC_FREEZEDRY_VERSION 3
 
 typedef struct {
     unsigned version;
@@ -4094,48 +1594,39 @@
     szone_t *szones;
 } malloc_frozen;
 
-static void *
-frozen_malloc(szone_t *zone, size_t new_size)
-{
+static void *frozen_malloc(szone_t *zone, size_t new_size) {
     return malloc(new_size);
 }
 
-static void *
-frozen_calloc(szone_t *zone, size_t num_items, size_t size)
-{
+static void *frozen_calloc(szone_t *zone, size_t num_items, size_t size) {
     return calloc(num_items, size);
 }
 
-static void *
-frozen_valloc(szone_t *zone, size_t new_size)
-{
+static void *frozen_valloc(szone_t *zone, size_t new_size) {
     return valloc(new_size);
 }
 
-static void *
-frozen_realloc(szone_t *zone, void *ptr, size_t new_size)
-{
+static void *frozen_realloc(szone_t *zone, void *ptr, size_t new_size) {
     size_t	old_size = szone_size(zone, ptr);
     void	*new_ptr;
-    
+
     if (new_size <= old_size) {
 	return ptr;
     }
+
     new_ptr = malloc(new_size);
+
     if (old_size > 0) {
 	memcpy(new_ptr, ptr, old_size);
     }
+
     return new_ptr;
 }
 
-static void
-frozen_free(szone_t *zone, void *ptr)
-{
-}
-
-static void
-frozen_destroy(szone_t *zone)
-{
+static void frozen_free(szone_t *zone, void *ptr) {
+}
+
+static void frozen_destroy(szone_t *zone) {
 }
 
 /********* Pseudo-private API for emacs unexec ************/
@@ -4153,9 +1644,7 @@
  * returns 0 (error) if any non-szone zones are encountered.
  */
 
-uintptr_t
-malloc_freezedry(void)
-{
+int malloc_freezedry(void) {
     extern unsigned malloc_num_zones;
     extern malloc_zone_t **malloc_zones;
     malloc_frozen *data;
@@ -4164,18 +1653,17 @@
     /* Allocate space in which to store the freezedry state. */
     data = (malloc_frozen *) malloc(sizeof(malloc_frozen));
 
-    /* Set freezedry version number so that malloc_jumpstart() can check for compatibility. */
+    /* Set freezedry version number so that malloc_jumpstart() can check for
+     * compatibility. */
     data->version = MALLOC_FREEZEDRY_VERSION;
 
     /* Allocate the array of szone pointers. */
     data->nszones = malloc_num_zones;
     data->szones = (szone_t *) calloc(malloc_num_zones, sizeof(szone_t));
 
-    /*
-     * Fill in the array of szone structures.  They are copied rather than
+    /* Fill in the array of szone structures.  They are copied rather than
      * referenced, since the originals are likely to be clobbered during malloc
-     * initialization.
-     */
+     * initialization. */
     for (i = 0; i < malloc_num_zones; i++) {
 	if (strcmp(malloc_zones[i]->zone_name, "DefaultMallocZone")) {
 	    /* Unknown zone type. */
@@ -4186,13 +1674,11 @@
 	memcpy(&data->szones[i], malloc_zones[i], sizeof(szone_t));
     }
 
-    return((uintptr_t)data);
-}
-
-int
-malloc_jumpstart(uintptr_t cookie)
-{
-    malloc_frozen *data = (malloc_frozen *)cookie;
+    return (int) data;
+}
+
+int malloc_jumpstart(int cookie) {
+    malloc_frozen *data = (malloc_frozen *) cookie;
     unsigned i;
 
     if (data->version != MALLOC_FREEZEDRY_VERSION) {
@@ -4211,7 +1697,7 @@
 	data->szones[i].basic_zone.free = (void *) frozen_free;
 	data->szones[i].basic_zone.realloc = (void *) frozen_realloc;
 	data->szones[i].basic_zone.destroy = (void *) frozen_destroy;
-	data->szones[i].basic_zone.introspect = (struct malloc_introspection_t *)&szone_introspect;
+	data->szones[i].basic_zone.introspect = &szone_introspect;
 
 	/* Register the freezedried zone. */
 	malloc_zone_register(&data->szones[i].basic_zone);
@@ -4219,4 +1705,3 @@
 
     return 0;
 }
-