Loading...
--- libmalloc/libmalloc-283.100.5/src/magazine_small.c
+++ libmalloc/libmalloc-792.80.2/src/magazine_small.c
@@ -66,25 +66,6 @@
 	meta_headers[index] = 0;
 }
 
-static MALLOC_INLINE MALLOC_ALWAYS_INLINE
-mag_index_t
-small_mag_get_thread_index(void)
-{
-#if CONFIG_SMALL_USES_HYPER_SHIFT
-	if (os_likely(_os_cpu_number_override == -1)) {
-		return _os_cpu_number() >> hyper_shift;
-	} else {
-		return _os_cpu_number_override >> hyper_shift;
-	}
-#else // CONFIG_SMALL_USES_HYPER_SHIFT
-	if (os_likely(_os_cpu_number_override == -1)) {
-		return _os_cpu_number();
-	} else {
-		return _os_cpu_number_override;
-	}
-#endif // CONFIG_SMALL_USES_HYPER_SHIFT
-}
-
 #pragma mark in-place free list
 
 static MALLOC_INLINE void
@@ -833,8 +814,8 @@
 		OSAtomicIncrement32Barrier(&(REGION_TRAILER_FOR_SMALL_REGION(r)->pinned_to_depot));
 		SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
 		for (i = 0; i < advisories; ++i) {
-			uintptr_t addr = (advisory[i].pnum << vm_page_quanta_shift) + (uintptr_t)r;
-			size_t size = advisory[i].size << vm_page_quanta_shift;
+			uintptr_t addr = (advisory[i].pnum << vm_kernel_page_shift) + (uintptr_t)r;
+			size_t size = advisory[i].size << vm_kernel_page_shift;
 
 			mvm_madvise_free(rack, r, addr, addr + size, NULL, rack->debug_flags & MALLOC_DO_SCRIBBLE);
 		}
@@ -916,11 +897,6 @@
 {
 	magazine_t *depot_ptr = &(rack->magazines[DEPOT_MAGAZINE_INDEX]);
 
-	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
-	if (rack->num_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
-		return 0;
-	}
-
 #if DEBUG_MALLOC
 	if (DEPOT_MAGAZINE_INDEX == mag_index) {
 		malloc_zone_error(rack->debug_flags, true, "small_get_region_from_depot called for magazine index -1\n", NULL, NULL);
@@ -933,22 +909,32 @@
 	// Appropriate a Depot'd region that can satisfy requested msize.
 	region_trailer_t *node;
 	region_t sparse_region;
+	msize_t try_msize = msize;
 
 	while (1) {
-		sparse_region = small_find_msize_region(rack, depot_ptr, DEPOT_MAGAZINE_INDEX, msize);
+		sparse_region = small_find_msize_region(rack, depot_ptr, DEPOT_MAGAZINE_INDEX, try_msize);
 		if (NULL == sparse_region) { // Depot empty?
 			SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
 			return 0;
 		}
 
 		node = REGION_TRAILER_FOR_SMALL_REGION(sparse_region);
-		if (0 >= node->pinned_to_depot) {
+		if (0 == node->pinned_to_depot) {
+			// Found one!
 			break;
 		}
 
-		SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
-		yield();
-		SZONE_MAGAZINE_PTR_LOCK(depot_ptr);
+		// Try the next msize up - maybe the head of its free list will be in
+		// a region we can use. Once we get the region we'll still allocate the
+		// original msize.
+		try_msize++;
+
+		if (try_msize > NUM_SMALL_SLOTS) {
+			// Tried all the msizes but couldn't get a usable region. Let's
+			// give up for now and we'll allocate a new region from the kernel.
+			SZONE_MAGAZINE_PTR_UNLOCK(depot_ptr);
+			return 0;
+		}
 	}
 
 	// disconnect node from Depot
@@ -959,7 +945,7 @@
 
 	// Transfer ownership of the region
 	MAGAZINE_INDEX_FOR_SMALL_REGION(sparse_region) = mag_index;
-	node->pinned_to_depot = 0;
+	MALLOC_ASSERT(node->pinned_to_depot == 0);
 
 	// Iterate the region putting its free entries on its new (locked) magazine's free list
 	size_t bytes_inplay = small_free_reattach_region(rack, small_mag_ptr, sparse_region);
@@ -994,18 +980,30 @@
 	for (mag_index = 0; mag_index < rack->num_magazines; mag_index++) {
 		size_t index;
 		for (index = 0; index < rack->region_generation->num_regions_allocated; ++index) {
-			SZONE_LOCK(SMALL_SZONE_FROM_RACK(rack));
+			rack_region_lock(rack);
 
 			region_t small = rack->region_generation->hashed_regions[index];
 			if (!small || small == HASHRING_REGION_DEALLOCATED) {
-				SZONE_UNLOCK(SMALL_SZONE_FROM_RACK(rack));
+				rack_region_unlock(rack);
 				continue;
 			}
 
+			region_trailer_t *trailer = REGION_TRAILER_FOR_SMALL_REGION(small);
+			// Make sure that the owning magazine doesn't try and take this out
+			// from under our feet.
+			trailer->dispose_flags |= RACK_DISPOSE_DELAY;
+			rack_region_unlock(rack);
+
 			magazine_t *mag_ptr = mag_lock_zine_for_region_trailer(rack->magazines,
-					REGION_TRAILER_FOR_SMALL_REGION(small),
-					MAGAZINE_INDEX_FOR_SMALL_REGION(small));
-			SZONE_UNLOCK(SMALL_SZONE_FROM_RACK(rack));
+					trailer, MAGAZINE_INDEX_FOR_SMALL_REGION(small));
+
+			// If acquiring the region lock was enough to prevent the owning
+			// magazine from deallocating the region, free it now so we don't
+			// do wasted work.
+			if (rack_region_maybe_dispose(rack, small, SMALL_REGION_SIZE, trailer)) {
+				SZONE_MAGAZINE_PTR_UNLOCK(mag_ptr);
+				continue;
+			}
 
 			/* Ordering is important here, the magazine of a region may potentially change
 			 * during mag_lock_zine_for_region_trailer, so src_mag_index must be taken
@@ -1019,6 +1017,11 @@
 				continue;
 			}
 
+			if (REGION_TRAILER_FOR_SMALL_REGION(small)->pinned_to_depot > 0) {
+				SZONE_MAGAZINE_PTR_UNLOCK(mag_ptr);
+				continue;
+			}
+
 			if (small == mag_ptr->mag_last_region && (mag_ptr->mag_bytes_free_at_end || mag_ptr->mag_bytes_free_at_start)) {
 				small_finalize_region(rack, mag_ptr);
 			}
@@ -1033,7 +1036,7 @@
 
 			SZONE_MAGAZINE_PTR_LOCK(small_depot_ptr);
 			MAGAZINE_INDEX_FOR_SMALL_REGION(small) = DEPOT_MAGAZINE_INDEX;
-			REGION_TRAILER_FOR_SMALL_REGION(small)->pinned_to_depot = 0;
+			MALLOC_ASSERT(REGION_TRAILER_FOR_SMALL_REGION(small)->pinned_to_depot == 0);
 
 			size_t bytes_inplay = small_free_reattach_region(rack, small_depot_ptr, small);
 
@@ -1143,31 +1146,18 @@
 	int objects_in_use = small_free_detach_region(rack, depot_ptr, sparse_region);
 
 	if (0 == objects_in_use) {
-		// Invalidate the hash table entry for this region with HASHRING_REGION_DEALLOCATED.
-		// Using HASHRING_REGION_DEALLOCATED preserves the collision chain, using HASHRING_OPEN_ENTRY (0) would not.
-		rgnhdl_t pSlot = hash_lookup_region_no_lock(rack->region_generation->hashed_regions,
-													rack->region_generation->num_regions_allocated,
-													rack->region_generation->num_regions_allocated_shift,
-													sparse_region);
-		if (NULL == pSlot) {
-			malloc_zone_error(rack->debug_flags, true, "small_free_try_depot_unmap_no_lock hash lookup failed: %p\n", sparse_region);
+		if (!rack_region_remove(rack, sparse_region, node)) {
 			return NULL;
 		}
-		*pSlot = HASHRING_REGION_DEALLOCATED;
 		depot_ptr->num_bytes_in_magazine -= SMALL_HEAP_SIZE;
-		// Atomically increment num_regions_dealloc
-#ifdef __LP64___
-		OSAtomicIncrement64(&rack->num_regions_dealloc);
-#else
-		OSAtomicIncrement32((int32_t *)&rack->num_regions_dealloc);
-#endif
 
 		// Caller will transfer ownership of the region back to the OS with no locks held
 		MAGMALLOC_DEALLOCREGION(SMALL_SZONE_FROM_RACK(rack), (void *)sparse_region, (int)SMALL_REGION_SIZE); // DTrace USDT Probe
 		return sparse_region;
 
 	} else {
-		malloc_zone_error(rack->debug_flags, true, "small_free_try_depot_unmap_no_lock objects_in_use not zero: %d\n", objects_in_use);
+		malloc_zone_error(rack->debug_flags, true,
+				"small_free_try_depot_unmap_no_lock objects_in_use not zero: %d\n", objects_in_use);
 		return NULL;
 	}
 }
@@ -1232,10 +1222,13 @@
 	MAGMALLOC_RECIRCREGION(SMALL_SZONE_FROM_RACK(rack), (int)mag_index, (void *)sparse_region, SMALL_REGION_SIZE,
 						   (int)BYTES_USED_FOR_SMALL_REGION(sparse_region)); // DTrace USDT Probe
 
-#if !CONFIG_AGGRESSIVE_MADVISE
-	// Mark free'd dirty pages with MADV_FREE to reduce memory pressure
-	small_free_scan_madvise_free(rack, depot_ptr, sparse_region);
-#endif
+#if CONFIG_AGGRESSIVE_MADVISE
+	if (!aggressive_madvise_enabled)
+#endif
+	{
+		// Mark free'd dirty pages with MADV_FREE to reduce memory pressure
+		small_free_scan_madvise_free(rack, depot_ptr, sparse_region);
+	}
 
 	// If the region is entirely empty vm_deallocate() it outside the depot lock
 	region_t r_dealloc = small_free_try_depot_unmap_no_lock(rack, depot_ptr, node);
@@ -1260,11 +1253,7 @@
 	region_trailer_t *node = REGION_TRAILER_FOR_SMALL_REGION(region);
 	size_t bytes_used = node->bytes_used;
 
-	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
-	if (rack->num_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
-		/* NOTHING */
-		return TRUE; // Caller must do SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr)
-	} else if (DEPOT_MAGAZINE_INDEX != mag_index) {
+	if (DEPOT_MAGAZINE_INDEX != mag_index) {
 		// Emptiness discriminant
 		if (small_region_below_recirc_threshold(region)) {
 			/* Region has crossed threshold from density to sparsity. Mark it "suitable" on the
@@ -1282,11 +1271,14 @@
 			return small_free_do_recirc_to_depot(rack, small_mag_ptr, mag_index);
 		}
 	} else {
-#if !CONFIG_AGGRESSIVE_MADVISE
-		// We are free'ing into the depot, so madvise as we do so unless we were madvising every incoming
-		// allocation anyway.
-		small_madvise_free_range_no_lock(rack, small_mag_ptr, region, freee, msize, headptr, headsize);
-#endif
+#if CONFIG_AGGRESSIVE_MADVISE
+		if (!aggressive_madvise_enabled)
+#endif
+		{
+			// We are free'ing into the depot, so madvise as we do so unless we were madvising every incoming
+			// allocation anyway.
+			small_madvise_free_range_no_lock(rack, small_mag_ptr, region, freee, msize, headptr, headsize);
+		}
 
 		if (0 < bytes_used || 0 < node->pinned_to_depot) {
 			/* Depot'd region is still live. Leave it in place on the Depot's recirculation list
@@ -1381,7 +1373,9 @@
 	trailer->bytes_used = (unsigned int)bytes_used;
 
 #if CONFIG_AGGRESSIVE_MADVISE
-	small_madvise_free_range_no_lock(rack, small_mag_ptr, region, freee, msize, original_ptr, original_size);
+	if (aggressive_madvise_enabled) {
+		small_madvise_free_range_no_lock(rack, small_mag_ptr, region, freee, msize, original_ptr, original_size);
+	}
 #endif
 
 	// Caller must do SZONE_MAGAZINE_PTR_UNLOCK(tiny_mag_ptr) if this function
@@ -2068,7 +2062,7 @@
 small_malloc_should_clear(rack_t *rack, msize_t msize, boolean_t cleared_requested)
 {
 	void *ptr;
-	mag_index_t mag_index = small_mag_get_thread_index() % rack->num_magazines;
+	mag_index_t mag_index = rack_get_thread_index(rack) % rack->num_magazines;
 	magazine_t *small_mag_ptr = &(rack->magazines[mag_index]);
 
 	MALLOC_TRACE(TRACE_small_malloc, (uintptr_t)rack, SMALL_BYTES_FOR_MSIZE(msize), (uintptr_t)small_mag_ptr, cleared_requested);
@@ -2119,16 +2113,17 @@
 
 		// The magazine is exhausted. A new region (heap) must be allocated to satisfy this call to malloc().
 		// The allocation, an mmap() system call, will be performed outside the magazine spin locks by the first
-		// thread that suffers the exhaustion. That thread sets "alloc_underway" and enters a critical section.
-		// Threads arriving here later are excluded from the critical section, yield the CPU, and then retry the
-		// allocation. After some time the magazine is resupplied, the original thread leaves with its allocation,
-		// and retry-ing threads succeed in the code just above.
-		if (!small_mag_ptr->alloc_underway) {
+		// thread that suffers the exhaustion. That thread accquires the magazine_alloc_lock, then drops the
+		// magazine lock to allow freeing threads to proceed. Allocating thrads that arrive later  are excluded
+		// from the critial section by the alloc lock. When those are unblocked, they succeed in the code above.
+		//
+		// Note that we need to trylock the alloc lock to avoid a deadlock, since we can't block on the alloc
+		// lock while holding the magazine lock
+		if (os_likely(_malloc_lock_trylock(&small_mag_ptr->magazine_alloc_lock))) {
+			// We got the alloc lock, so we are the thread that should allocate a new region
 			void *fresh_region;
 
 			// time to create a new region (do this outside the magazine lock)
-			small_mag_ptr->alloc_underway = TRUE;
-			OSMemoryBarrier();
 			SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
 			fresh_region = mvm_allocate_pages(SMALL_REGION_SIZE,
 					SMALL_BLOCKS_ALIGN,
@@ -2140,9 +2135,8 @@
 			MAGMALLOC_ALLOCREGION(SMALL_SZONE_FROM_RACK(rack), (int)mag_index, fresh_region, SMALL_REGION_SIZE);
 
 			if (!fresh_region) { // out of memory!
-				small_mag_ptr->alloc_underway = FALSE;
-				OSMemoryBarrier();
 				SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
+				_malloc_lock_unlock(&small_mag_ptr->magazine_alloc_lock);
 				return NULL;
 			}
 
@@ -2150,14 +2144,20 @@
 			ptr = small_malloc_from_region_no_lock(rack, small_mag_ptr, mag_index, msize, fresh_region);
 
 			// we don't clear because this freshly allocated space is pristine
-			small_mag_ptr->alloc_underway = FALSE;
-			OSMemoryBarrier();
 			SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
+			_malloc_lock_unlock(&small_mag_ptr->magazine_alloc_lock);
 			CHECK(szone, __PRETTY_FUNCTION__);
 			return ptr;
 		} else {
+			// We failed to get the alloc lock, so someone else is allocating.
+			// Drop the magazine lock...
 			SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
-			yield();
+
+			// Wait for the other thread on the alloc lock
+			_malloc_lock_lock(&small_mag_ptr->magazine_alloc_lock);
+			_malloc_lock_unlock(&small_mag_ptr->magazine_alloc_lock);
+
+			// Reacquire the magazine lock to go around the loop again
 			SZONE_MAGAZINE_PTR_LOCK(small_mag_ptr);
 		}
 	}
@@ -2204,9 +2204,6 @@
 static MALLOC_NOINLINE void
 free_small_botch(rack_t *rack, void *ptr)
 {
-	mag_index_t mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
-	magazine_t *small_mag_ptr = &(rack->magazines[mag_index]);
-	SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
 	malloc_zone_error(rack->debug_flags, true, "double free for ptr %p\n", ptr);
 }
 
@@ -2239,6 +2236,7 @@
 
 		/* check that we don't already have this pointer in the cache */
 		if (ptr == ptr2) {
+			SZONE_MAGAZINE_PTR_UNLOCK(small_mag_ptr);
 			free_small_botch(rack, ptr);
 			return;
 		}