Loading...
--- libmalloc/libmalloc-116.50.8/src/nano_malloc.c
+++ libmalloc/libmalloc-140.50.6/src/nano_malloc.c
@@ -125,7 +125,7 @@
 
 	if (add_guard_pages) {
 		addr += vm_page_size;
-		protect((void *)addr, size, PROT_NONE, debug_flags);
+		mvm_protect((void *)addr, size, PROT_NONE, debug_flags);
 	}
 	return (void *)addr;
 }
@@ -153,6 +153,44 @@
 		nanozone_error(nanozone, 0, "Can't deallocate_pages at", addr, NULL);
 	}
 }
+
+#if NANO_PREALLOCATE_BAND_VM
+static boolean_t
+nano_preallocate_band_vm(void)
+{
+	nano_blk_addr_t u;
+	uintptr_t s, e;
+
+	u.fields.nano_signature = NANOZONE_SIGNATURE;
+	u.fields.nano_mag_index = 0;
+	u.fields.nano_band = 0;
+	u.fields.nano_slot = 0;
+	u.fields.nano_offset = 0;
+
+	s = u.addr; // start of first possible band
+
+	u.fields.nano_mag_index = (1 << NANO_MAG_BITS) - 1;
+	u.fields.nano_band = (1 << NANO_BAND_BITS) - 1;
+
+	e = u.addr + BAND_SIZE; // end of last possible band
+
+	mach_vm_address_t vm_addr = s;
+	mach_vm_size_t vm_size = (e - s);
+
+	kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size, 0,
+			VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO), MEMORY_OBJECT_NULL, 0, FALSE,
+			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+	void *q = (void *)vm_addr;
+	if (kr || q != (void*)s) { // Must get exactly what we asked for
+		if (!kr) {
+			mach_vm_deallocate(mach_task_self(), vm_addr, vm_size);
+		}
+		return FALSE;
+	}
+	return TRUE;
+}
+#endif
 
 /*
  * We maintain separate free lists for each (quantized) size. The literature
@@ -190,8 +228,8 @@
 	pMeta->slot_current_base_addr = p;
 
 	mach_vm_address_t vm_addr = p & ~((uintptr_t)(BAND_SIZE - 1)); // Address of the (2MB) band covering this (128KB) slot
-
 	if (nanozone->band_max_mapped_baseaddr[mag_index] < vm_addr) {
+#if !NANO_PREALLOCATE_BAND_VM
 		// Obtain the next band to cover this slot
 		kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, BAND_SIZE, 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO),
 				MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
@@ -203,7 +241,7 @@
 			}
 			return FALSE;
 		}
-
+#endif
 		nanozone->band_max_mapped_baseaddr[mag_index] = vm_addr;
 	}
 
@@ -390,16 +428,80 @@
 				uintptr_t slot_band, clone_slot_band_base = clone_slot_base;
 				uintptr_t skip_adj = index_to_offset(nanozone, pMeta, (index_t)pMeta->slot_objects_skipped);
 
-				while (q.addr < pMeta->slot_limit_addr) {
-					// read slot in each remote band. Lands in some random location.
-					size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE);
-					err = reader(task, (vm_address_t)(q.addr + skip_adj), len - skip_adj, (void **)&slot_band);
+				// Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space
+				bitarray_t madv_page_bitarray;
+				int log_page_count;
+
+				if (pMeta->slot_madvised_pages) {
+					log_page_count = pMeta->slot_madvised_log_page_count;
+					err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count),
+								 (void **)&madv_page_bitarray);
 					if (err) {
 						return err;
 					}
-
-					// Place the data just read in the correct position relative to the local magazine.
-					memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len - skip_adj);
+				} else {
+					madv_page_bitarray = NULL;
+					log_page_count = 0;
+				}
+
+				while (q.addr < pMeta->slot_limit_addr) {
+					// read slot in each remote band. Lands in some random location. Do not read
+					// parts of the slot that are in madvised pages.
+					if (!madv_page_bitarray) {
+						// Nothing madvised yet - read everything in one go.
+						size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE) - skip_adj;
+						err = reader(task, (vm_address_t)(q.addr + skip_adj), len, (void **)&slot_band);
+						if (err) {
+							return err;
+						}
+
+						// Place the data just read in the correct position relative to the local magazine.
+						memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len);
+					} else {
+						// We madvised at least one page. Read only the pages that
+						// have not been madvised. If bitarray_t had operations
+						// like "get next bit set after a given bit" and "find
+						// next unset bit after a given bit", we could do this more
+						// efficiently but given that it doesn't, we have to walk
+						// through each page individually. In practice this is not
+						// much of an issue because this code is only used by
+						// sampling tools and the additional time required is not
+						// really noticeable.
+						size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE) - skip_adj;
+						vm_address_t start_addr = (vm_address_t)(q.addr + skip_adj);
+						vm_address_t end_addr = (vm_address_t)(start_addr + len);
+						void *target_addr = (void *)(clone_slot_band_base + skip_adj);
+						for (vm_address_t addr = start_addr; addr < end_addr;) {
+							vm_address_t next_page_addr = trunc_page(addr) + vm_page_size;
+							size_t read_size = MIN(len, next_page_addr - addr);
+
+							boolean_t madvised = false;
+							nano_blk_addr_t r;
+							r.addr = addr;
+							index_t pgnum = ((((unsigned)r.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)r.fields.nano_offset)) >>
+							vm_kernel_page_shift;
+							unsigned int log_page_count = pMeta->slot_madvised_log_page_count;
+							madvised = (pgnum < (1 << log_page_count)) &&
+							bitarray_get(madv_page_bitarray, log_page_count, pgnum);
+							if (!madvised) {
+								// This is not an madvised page - grab the data.
+								err = reader(task, addr, read_size, (void **)&slot_band);
+								if (err) {
+									return err;
+								}
+
+								// Place the data just read in the correct position relative to the local magazine.
+								memcpy(target_addr, (void *)slot_band, read_size);
+							} else {
+								// This is an madvised page - there should be nothing in here that's
+								// on the freelist, so just write garbage to the target memory.
+								memset(target_addr, (char)0xee, read_size);
+							}
+							addr = next_page_addr;
+							target_addr += read_size;
+							len -= read_size;
+						}
+					}
 
 					// Simultaneously advance pointers in remote and ourselves to the next band.
 					q.addr += BAND_SIZE;
@@ -433,22 +535,6 @@
 					}
 				}
 				// N.B. pMeta->slot_LIFO in *this* task is now drained (remote free list has *not* been disturbed)
-
-				// Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space
-				bitarray_t madv_page_bitarray;
-				int log_page_count;
-
-				if (pMeta->slot_madvised_pages) {
-					log_page_count = pMeta->slot_madvised_log_page_count;
-					err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count),
-							(void **)&madv_page_bitarray);
-					if (err) {
-						return err;
-					}
-				} else {
-					madv_page_bitarray = NULL;
-					log_page_count = 0;
-				}
 
 				// Enumerate all the block indices issued to date, and report those not on the free list
 				index_t i;
@@ -1478,6 +1564,7 @@
 {
 	nanozone_t *nanozone;
 	kern_return_t err;
+	struct nanozone_s zone_copy;
 
 	if (!reader) {
 		reader = nanozone_default_reader;
@@ -1487,8 +1574,9 @@
 	if (err) {
 		return err;
 	}
-
-	err = segregated_in_use_enumerator(task, context, type_mask, nanozone, reader, recorder);
+	memcpy(&zone_copy, nanozone, sizeof(zone_copy));
+
+	err = segregated_in_use_enumerator(task, context, type_mask, &zone_copy, reader, recorder);
 	return err;
 }
 
@@ -1832,19 +1920,24 @@
 	nanozone_t *nanozone;
 	int i, j;
 
+	/* Note: It is important that create_nano_zone clears _malloc_engaged_nano
+	 * if it is unable to enable the nanozone (and chooses not to abort). As
+	 * several functions rely on _malloc_engaged_nano to determine if they
+	 * should manipulate the nanozone, and these should not run if we failed
+	 * to create the zone.
+	 */
 	if (!_malloc_engaged_nano) {
 		return NULL;
 	}
 
-#if defined(__x86_64__)
 	if (_COMM_PAGE_VERSION_REQD > (*((uint16_t *)_COMM_PAGE_VERSION))) {
 		MALLOC_PRINTF_FATAL_ERROR((*((uint16_t *)_COMM_PAGE_VERSION)), "comm page version mismatch");
 	}
-#endif
 
 	/* get memory for the zone. */
 	nanozone = nano_allocate_pages(NULL, NANOZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC);
 	if (!nanozone) {
+		_malloc_engaged_nano = false;
 		return NULL;
 	}
 
@@ -1876,20 +1969,18 @@
 	nanozone->our_signature = NANOZONE_SIGNATURE;
 
 /* Query the number of configured processors. */
-#if defined(__x86_64__)
 	nanozone->phys_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_PHYSICAL_CPUS;
 	nanozone->logical_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_LOGICAL_CPUS;
-#else
-#error Unknown architecture
-#endif
-
-	if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size) / sizeof(nanozone->core_mapped_size[0])) {
-		_malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned because NCPUS mismatch.\n");
-		return NULL;
+
+	if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size) /
+			sizeof(nanozone->core_mapped_size[0])) {
+		MALLOC_PRINTF_FATAL_ERROR(nanozone->phys_ncpus,
+				"nanozone abandoned because NCPUS > max magazines.\n");
 	}
 
 	if (0 != (nanozone->logical_ncpus % nanozone->phys_ncpus)) {
-		MALLOC_PRINTF_FATAL_ERROR(nanozone->logical_ncpus % nanozone->phys_ncpus, "logical_ncpus % phys_ncpus != 0");
+		MALLOC_PRINTF_FATAL_ERROR(nanozone->logical_ncpus % nanozone->phys_ncpus,
+				"logical_ncpus % phys_ncpus != 0");
 	}
 
 	switch (nanozone->logical_ncpus / nanozone->phys_ncpus) {
@@ -1935,6 +2026,40 @@
 	return (malloc_zone_t *)nanozone;
 }
 
+boolean_t _malloc_engaged_nano;
+
+void
+nano_init(const char *envp[], const char *apple[])
+{
+    const char *flag = _simple_getenv(apple, "MallocNanoZone");
+	if (flag && flag[0] == '1') {
+		_malloc_engaged_nano = 1;
+	}
+#if CONFIG_NANO_SMALLMEM_DYNAMIC_DISABLE_35305995
+	// Disable nano malloc on <=1gb configurations rdar://problem/35305995
+	uint64_t memsize = platform_hw_memsize();
+	if (memsize <= (1ull << 30)) {
+		_malloc_engaged_nano = 0;
+	}
+#endif // CONFIG_NANO_SMALLMEM_DYNAMIC_DISABLE_35305995
+	/* Explicit overrides from the environment */
+	flag = _simple_getenv(envp, "MallocNanoZone");
+	if (flag && flag[0] == '1') {
+		_malloc_engaged_nano = 1;
+	} else if (flag && flag[0] == '0') {
+		_malloc_engaged_nano = 0;
+	}
+#if NANO_PREALLOCATE_BAND_VM
+	// Unconditionally preallocate the VA space set aside for nano malloc to
+	// reserve it in all configurations. rdar://problem/33392283
+	boolean_t preallocated = nano_preallocate_band_vm();
+	if (!preallocated && _malloc_engaged_nano) {
+		_malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned due to inability to preallocate reserved vm space.\n");
+		_malloc_engaged_nano = 0;
+	}
+#endif
+}
+
 #endif // CONFIG_NANOZONE
 
 /* vim: set noet:ts=4:sw=4:cindent: */