nano_malloc.c diff - src/nano_malloc.c - Libmalloc source code libmalloc-116

src/nano_malloc.c libmalloc-116 ⇄ libmalloc-140.50.6
--- libmalloc/libmalloc-116/src/nano_malloc.c
+++ libmalloc/libmalloc-140.50.6/src/nano_malloc.c
@@ -125,7 +125,7 @@
 
 	if (add_guard_pages) {
 		addr += vm_page_size;
-		protect((void *)addr, size, PROT_NONE, debug_flags);
+		mvm_protect((void *)addr, size, PROT_NONE, debug_flags);
 	}
 	return (void *)addr;
 }
@@ -153,6 +153,44 @@
 		nanozone_error(nanozone, 0, "Can't deallocate_pages at", addr, NULL);
 	}
 }
+
+#if NANO_PREALLOCATE_BAND_VM
+static boolean_t
+nano_preallocate_band_vm(void)
+{
+	nano_blk_addr_t u;
+	uintptr_t s, e;
+
+	u.fields.nano_signature = NANOZONE_SIGNATURE;
+	u.fields.nano_mag_index = 0;
+	u.fields.nano_band = 0;
+	u.fields.nano_slot = 0;
+	u.fields.nano_offset = 0;
+
+	s = u.addr; // start of first possible band
+
+	u.fields.nano_mag_index = (1 << NANO_MAG_BITS) - 1;
+	u.fields.nano_band = (1 << NANO_BAND_BITS) - 1;
+
+	e = u.addr + BAND_SIZE; // end of last possible band
+
+	mach_vm_address_t vm_addr = s;
+	mach_vm_size_t vm_size = (e - s);
+
+	kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size, 0,
+			VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO), MEMORY_OBJECT_NULL, 0, FALSE,
+			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+	void *q = (void *)vm_addr;
+	if (kr || q != (void*)s) { // Must get exactly what we asked for
+		if (!kr) {
+			mach_vm_deallocate(mach_task_self(), vm_addr, vm_size);
+		}
+		return FALSE;
+	}
+	return TRUE;
+}
+#endif
 
 /*
  * We maintain separate free lists for each (quantized) size. The literature
@@ -190,8 +228,8 @@
 	pMeta->slot_current_base_addr = p;
 
 	mach_vm_address_t vm_addr = p & ~((uintptr_t)(BAND_SIZE - 1)); // Address of the (2MB) band covering this (128KB) slot
-
 	if (nanozone->band_max_mapped_baseaddr[mag_index] < vm_addr) {
+#if !NANO_PREALLOCATE_BAND_VM
 		// Obtain the next band to cover this slot
 		kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, BAND_SIZE, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO),
 				MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
@@ -203,7 +241,7 @@
 			}
 			return FALSE;
 		}
-
+#endif
 		nanozone->band_max_mapped_baseaddr[mag_index] = vm_addr;
 	}
 
@@ -390,16 +428,80 @@
 				uintptr_t slot_band, clone_slot_band_base = clone_slot_base;
 				uintptr_t skip_adj = index_to_offset(nanozone, pMeta, (index_t)pMeta->slot_objects_skipped);
 
-				while (q.addr < pMeta->slot_limit_addr) {
-					// read slot in each remote band. Lands in some random location.
-					size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE);
-					err = reader(task, (vm_address_t)(q.addr + skip_adj), len - skip_adj, (void **)&slot_band);
+				// Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space
+				bitarray_t madv_page_bitarray;
+				int log_page_count;
+
+				if (pMeta->slot_madvised_pages) {
+					log_page_count = pMeta->slot_madvised_log_page_count;
+					err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count),
+								 (void **)&madv_page_bitarray);
 					if (err) {
 						return err;
 					}
-
-					// Place the data just read in the correct position relative to the local magazine.
-					memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len - skip_adj);
+				} else {
+					madv_page_bitarray = NULL;
+					log_page_count = 0;
+				}
+
+				while (q.addr < pMeta->slot_limit_addr) {
+					// read slot in each remote band. Lands in some random location. Do not read
+					// parts of the slot that are in madvised pages.
+					if (!madv_page_bitarray) {
+						// Nothing madvised yet - read everything in one go.
+						size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE) - skip_adj;
+						err = reader(task, (vm_address_t)(q.addr + skip_adj), len, (void **)&slot_band);
+						if (err) {
+							return err;
+						}
+
+						// Place the data just read in the correct position relative to the local magazine.
+						memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len);
+					} else {
+						// We madvised at least one page. Read only the pages that
+						// have not been madvised. If bitarray_t had operations
+						// like "get next bit set after a given bit" and "find
+						// next unset bit after a given bit", we could do this more
+						// efficiently but given that it doesn't, we have to walk
+						// through each page individually. In practice this is not
+						// much of an issue because this code is only used by
+						// sampling tools and the additional time required is not
+						// really noticeable.
+						size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE) - skip_adj;
+						vm_address_t start_addr = (vm_address_t)(q.addr + skip_adj);
+						vm_address_t end_addr = (vm_address_t)(start_addr + len);
+						void *target_addr = (void *)(clone_slot_band_base + skip_adj);
+						for (vm_address_t addr = start_addr; addr < end_addr;) {
+							vm_address_t next_page_addr = trunc_page(addr) + vm_page_size;
+							size_t read_size = MIN(len, next_page_addr - addr);
+
+							boolean_t madvised = false;
+							nano_blk_addr_t r;
+							r.addr = addr;
+							index_t pgnum = ((((unsigned)r.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)r.fields.nano_offset)) >>
+							vm_kernel_page_shift;
+							unsigned int log_page_count = pMeta->slot_madvised_log_page_count;
+							madvised = (pgnum < (1 << log_page_count)) &&
+							bitarray_get(madv_page_bitarray, log_page_count, pgnum);
+							if (!madvised) {
+								// This is not an madvised page - grab the data.
+								err = reader(task, addr, read_size, (void **)&slot_band);
+								if (err) {
+									return err;
+								}
+
+								// Place the data just read in the correct position relative to the local magazine.
+								memcpy(target_addr, (void *)slot_band, read_size);
+							} else {
+								// This is an madvised page - there should be nothing in here that's
+								// on the freelist, so just write garbage to the target memory.
+								memset(target_addr, (char)0xee, read_size);
+							}
+							addr = next_page_addr;
+							target_addr += read_size;
+							len -= read_size;
+						}
+					}
 
 					// Simultaneously advance pointers in remote and ourselves to the next band.
 					q.addr += BAND_SIZE;
@@ -433,22 +535,6 @@
 					}
 				}
 				// N.B. pMeta->slot_LIFO in *this* task is now drained (remote free list has *not* been disturbed)
-
-				// Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space
-				bitarray_t madv_page_bitarray;
-				int log_page_count;
-
-				if (pMeta->slot_madvised_pages) {
-					log_page_count = pMeta->slot_madvised_log_page_count;
-					err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count),
-							(void **)&madv_page_bitarray);
-					if (err) {
-						return err;
-					}
-				} else {
-					madv_page_bitarray = NULL;
-					log_page_count = 0;
-				}
 
 				// Enumerate all the block indices issued to date, and report those not on the free list
 				index_t i;
@@ -565,6 +651,10 @@
 	nano_meta_admin_t pMeta;
 
 	p.addr = (uint64_t)ptr; // Begin the dissection of ptr
+
+	if (nanozone->our_signature != p.fields.nano_signature) {
+		return 0;
+	}
 
 	if (nanozone->phys_ncpus <= p.fields.nano_mag_index) {
 		return 0;
@@ -952,14 +1042,19 @@
 __nano_free(nanozone_t *nanozone, void *ptr, boolean_t do_scribble)
 {
 	MALLOC_TRACE(TRACE_nano_free, (uintptr_t)nanozone, (uintptr_t)ptr, do_scribble, 0);
-	nano_blk_addr_t p; // happily, the compiler holds this in a register
 
 	if (!ptr) {
 		return; // Protect against malloc_zone_free() passing NULL.
 	}
-	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
-	if (nanozone->our_signature == p.fields.nano_signature) {
-		_nano_free_check_scribble(nanozone, ptr, do_scribble);
+
+	// <rdar://problem/26481467> exhausting a slot may result in a pointer with
+	// the nanozone prefix being given to nano_free via malloc_zone_free. Calling
+	// vet_and_size here, instead of in _nano_free_check_scribble means we can
+	// early-out into the helper_zone if it turns out nano does not own this ptr.
+	size_t sz = _nano_vet_and_size_of_live(nanozone, ptr);
+
+	if (sz) {
+		_nano_free_trusted_size_check_scribble(nanozone, ptr, sz, do_scribble);
 		return;
 	} else {
 		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
@@ -978,13 +1073,17 @@
 static void
 nano_forked_free(nanozone_t *nanozone, void *ptr)
 {
-	nano_blk_addr_t p; // happily, the compiler holds this in a register
-
 	if (!ptr) {
 		return; // Protect against malloc_zone_free() passing NULL.
 	}
-	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
-	if (nanozone->our_signature == p.fields.nano_signature) {
+
+	// <rdar://problem/26481467> exhausting a slot may result in a pointer with
+	// the nanozone prefix being given to nano_free via malloc_zone_free. Calling
+	// vet_and_size here, instead of in _nano_free_check_scribble means we can
+	// early-out into the helper_zone if it turns out nano does not own this ptr.
+	size_t sz = _nano_vet_and_size_of_live(nanozone, ptr);
+
+	if (sz) {
 		/* NOTHING. Drop it on the floor as nanozone metadata could be fouled by fork. */
 		return;
 	} else {
@@ -1026,49 +1125,42 @@
 static void *
 nano_realloc(nanozone_t *nanozone, void *ptr, size_t new_size)
 {
-	nano_blk_addr_t p; // happily, the compiler holds this in a register
-
-	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
-
-	if (NULL == ptr) { // could occur through malloc_zone_realloc() path
+	// could occur through malloc_zone_realloc() path
+	if (!ptr) {
 		// If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
 		return nano_malloc(nanozone, new_size);
-	} else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature?
-		if (new_size <= NANO_MAX_SIZE) {							 // nano to nano?
+	}
+
+	size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
+	if (!old_size) {
+		// not-nano pointer, hand down to helper zone
+		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
+		return zone->realloc(zone, ptr, new_size);
+	} else {
+		if (new_size <= NANO_MAX_SIZE) {
+			// nano to nano?
 			void *q = _nano_realloc(nanozone, ptr, new_size);
 			if (q) {
 				return q;
-			} else { // nano exhausted
-					 /* FALLTHROUGH to helper zone copying case */
+			} else { 
+				// nano exhausted
+				/* FALLTHROUGH to helper zone copying case */
 			}
 		}
 
-		// nano to larger-than-nano (or FALLTHROUGH from just above)
-		size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
-
-		if (!old_size) {
-			nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL);
+		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
+		void *new_ptr = zone->malloc(zone, new_size);
+
+		if (new_ptr) {
+			size_t valid_size = MIN(old_size, new_size);
+			memcpy(new_ptr, ptr, valid_size);
+			_nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & MALLOC_DO_SCRIBBLE));
+			return new_ptr;
+		} else {
+			/* Original ptr is left intact */
 			return NULL;
-		} else {
-			malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
-			void *new_ptr = zone->malloc(zone, new_size);
-
-			if (new_ptr) {
-				size_t valid_size = MIN(old_size, new_size);
-				memcpy(new_ptr, ptr, valid_size);
-				_nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & MALLOC_DO_SCRIBBLE));
-				return new_ptr;
-			} else {
-				/* Original ptr is left intact */
-				return NULL;
-			}
-			/* NOTREACHED */
-		}
-	} else {
-		// other-than-nano (not necessarily larger! possibly NULL!) to whatever
-		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
-
-		return zone->realloc(zone, ptr, new_size);
+		}
+		/* NOTREACHED */
 	}
 	/* NOTREACHED */
 }
@@ -1076,14 +1168,18 @@
 static void *
 nano_forked_realloc(nanozone_t *nanozone, void *ptr, size_t new_size)
 {
-	nano_blk_addr_t p; // happily, the compiler holds this in a register
-
-	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
-
-	if (NULL == ptr) { // could occur through malloc_zone_realloc() path
+	// could occur through malloc_zone_realloc() path
+	if (!ptr) {
 		// If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
 		return nano_forked_malloc(nanozone, new_size);
-	} else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature?
+	}
+
+	size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
+	if (!old_size) {
+		// not-nano pointer, hand down to helper zone
+		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
+		return zone->realloc(zone, ptr, new_size);
+	} else {
 		if (0 == new_size) {
 			// If size is 0 and ptr is not a null pointer, the object pointed to is freed.
 			// However as nanozone metadata could be fouled by fork, we'll intentionally leak it.
@@ -1093,31 +1189,19 @@
 			return nano_forked_malloc(nanozone, 1);
 		}
 
-		size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
-
-		if (!old_size) {
-			nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL);
+		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
+		void *new_ptr = zone->malloc(zone, new_size);
+
+		if (new_ptr) {
+			size_t valid_size = MIN(old_size, new_size);
+			memcpy(new_ptr, ptr, valid_size);
+			/* Original pointer is intentionally leaked as nanozone metadata could be fouled by fork. */
+			return new_ptr;
+		} else {
+			/* Original ptr is left intact */
 			return NULL;
-		} else {
-			malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
-			void *new_ptr = zone->malloc(zone, new_size);
-
-			if (new_ptr) {
-				size_t valid_size = MIN(old_size, new_size);
-				memcpy(new_ptr, ptr, valid_size);
-				/* Original pointer is intentionally leaked as nanozone metadata could be fouled by fork. */
-				return new_ptr;
-			} else {
-				/* Original ptr is left intact */
-				return NULL;
-			}
-			/* NOTREACHED */
-		}
-	} else {
-		// other-than-nano (not necessarily larger! possibly NULL!) to whatever
-		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
-
-		return zone->realloc(zone, ptr, new_size);
+		}
+		/* NOTREACHED */
 	}
 	/* NOTREACHED */
 }
@@ -1480,6 +1564,7 @@
 {
 	nanozone_t *nanozone;
 	kern_return_t err;
+	struct nanozone_s zone_copy;
 
 	if (!reader) {
 		reader = nanozone_default_reader;
@@ -1489,8 +1574,9 @@
 	if (err) {
 		return err;
 	}
-
-	err = segregated_in_use_enumerator(task, context, type_mask, nanozone, reader, recorder);
+	memcpy(&zone_copy, nanozone, sizeof(zone_copy));
+
+	err = segregated_in_use_enumerator(task, context, type_mask, &zone_copy, reader, recorder);
 	return err;
 }
 
@@ -1834,19 +1920,24 @@
 	nanozone_t *nanozone;
 	int i, j;
 
+	/* Note: It is important that create_nano_zone clears _malloc_engaged_nano
+	 * if it is unable to enable the nanozone (and chooses not to abort). As
+	 * several functions rely on _malloc_engaged_nano to determine if they
+	 * should manipulate the nanozone, and these should not run if we failed
+	 * to create the zone.
+	 */
 	if (!_malloc_engaged_nano) {
 		return NULL;
 	}
 
-#if defined(__x86_64__)
 	if (_COMM_PAGE_VERSION_REQD > (*((uint16_t *)_COMM_PAGE_VERSION))) {
 		MALLOC_PRINTF_FATAL_ERROR((*((uint16_t *)_COMM_PAGE_VERSION)), "comm page version mismatch");
 	}
-#endif
 
 	/* get memory for the zone. */
 	nanozone = nano_allocate_pages(NULL, NANOZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC);
 	if (!nanozone) {
+		_malloc_engaged_nano = false;
 		return NULL;
 	}
 
@@ -1878,20 +1969,18 @@
 	nanozone->our_signature = NANOZONE_SIGNATURE;
 
 /* Query the number of configured processors. */
-#if defined(__x86_64__)
 	nanozone->phys_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_PHYSICAL_CPUS;
 	nanozone->logical_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_LOGICAL_CPUS;
-#else
-#error Unknown architecture
-#endif
-
-	if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size) / sizeof(nanozone->core_mapped_size[0])) {
-		_malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned because NCPUS mismatch.\n");
-		return NULL;
+
+	if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size) /
+			sizeof(nanozone->core_mapped_size[0])) {
+		MALLOC_PRINTF_FATAL_ERROR(nanozone->phys_ncpus,
+				"nanozone abandoned because NCPUS > max magazines.\n");
 	}
 
 	if (0 != (nanozone->logical_ncpus % nanozone->phys_ncpus)) {
-		MALLOC_PRINTF_FATAL_ERROR(nanozone->logical_ncpus % nanozone->phys_ncpus, "logical_ncpus % phys_ncpus != 0");
+		MALLOC_PRINTF_FATAL_ERROR(nanozone->logical_ncpus % nanozone->phys_ncpus,
+				"logical_ncpus % phys_ncpus != 0");
 	}
 
 	switch (nanozone->logical_ncpus / nanozone->phys_ncpus) {
@@ -1937,6 +2026,40 @@
 	return (malloc_zone_t *)nanozone;
 }
 
+boolean_t _malloc_engaged_nano;
+
+void
+nano_init(const char *envp[], const char *apple[])
+{
+    const char *flag = _simple_getenv(apple, "MallocNanoZone");
+	if (flag && flag[0] == '1') {
+		_malloc_engaged_nano = 1;
+	}
+#if CONFIG_NANO_SMALLMEM_DYNAMIC_DISABLE_35305995
+	// Disable nano malloc on <=1gb configurations rdar://problem/35305995
+	uint64_t memsize = platform_hw_memsize();
+	if (memsize <= (1ull << 30)) {
+		_malloc_engaged_nano = 0;
+	}
+#endif // CONFIG_NANO_SMALLMEM_DYNAMIC_DISABLE_35305995
+	/* Explicit overrides from the environment */
+	flag = _simple_getenv(envp, "MallocNanoZone");
+	if (flag && flag[0] == '1') {
+		_malloc_engaged_nano = 1;
+	} else if (flag && flag[0] == '0') {
+		_malloc_engaged_nano = 0;
+	}
+#if NANO_PREALLOCATE_BAND_VM
+	// Unconditionally preallocate the VA space set aside for nano malloc to
+	// reserve it in all configurations. rdar://problem/33392283
+	boolean_t preallocated = nano_preallocate_band_vm();
+	if (!preallocated && _malloc_engaged_nano) {
+		_malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned due to inability to preallocate reserved vm space.\n");
+		_malloc_engaged_nano = 0;
+	}
+#endif
+}
+
 #endif // CONFIG_NANOZONE
 
 /* vim: set noet:ts=4:sw=4:cindent: */