Loading...
src/dyldInitialization.cpp dyld-44.4 dyld-96.2
--- dyld/dyld-44.4/src/dyldInitialization.cpp
+++ dyld/dyld-96.2/src/dyldInitialization.cpp
@@ -22,8 +22,11 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#define __STDC_LIMIT_MACROS
+#include <stdint.h>
 #include <stddef.h>
 #include <string.h>
+#include <stdlib.h>
 #include <mach/mach.h>
 #include <mach-o/loader.h>
 #include <mach-o/ldsyms.h>
@@ -31,7 +34,15 @@
 #if __ppc__ || __ppc64__
 	#include <mach-o/ppc/reloc.h>
 #endif
+#if __x86_64__
+	#include <mach-o/x86_64/reloc.h>
+#endif
 #include "dyld.h"
+
+#ifndef MH_PIE
+	#define MH_PIE 0x200000 
+#endif
+
 
 #if __LP64__
 	#define macho_header			mach_header_64
@@ -46,6 +57,15 @@
 	#define macho_section			section
 	#define RELOC_SIZE				2
 #endif
+
+#if __x86_64__
+	#define POINTER_RELOC X86_64_RELOC_UNSIGNED
+#else
+	#define POINTER_RELOC GENERIC_RELOC_VANILLA
+#endif
+
+// from dyld.cpp
+namespace dyld { extern bool isRosetta(); };
 
 
 //
@@ -94,20 +114,22 @@
 	}
 }
 
-
 //
 // If the kernel does not load dyld at its preferred address, we need to apply 
 // fixups to various initialized parts of the __DATA segment
 //
 static void rebaseDyld(const struct macho_header* mh, intptr_t slide)
 {
-	// get interesting pointers into dyld
+	// rebase non-lazy pointers (which all point internal to dyld, since dyld uses no shared libraries)
+	// and get interesting pointers into dyld
 	const uint32_t cmd_count = mh->ncmds;
 	const struct load_command* const cmds = (struct load_command*)(((char*)mh)+sizeof(macho_header));
 	const struct load_command* cmd = cmds;
 	const struct macho_segment_command* linkEditSeg = NULL;
+#if __x86_64__
+	const struct macho_segment_command* firstWritableSeg = NULL;
+#endif
 	const struct dysymtab_command* dynamicSymbolTable = NULL;
-	const struct macho_section* nonLazySection = NULL;
 	for (uint32_t i = 0; i < cmd_count; ++i) {
 		switch (cmd->cmd) {
 			case LC_SEGMENT_COMMAND:
@@ -119,9 +141,19 @@
 					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
 					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
 						const uint8_t type = sect->flags & SECTION_TYPE;
-						if ( type == S_NON_LAZY_SYMBOL_POINTERS ) 
-							nonLazySection = sect;
+						if ( type == S_NON_LAZY_SYMBOL_POINTERS ) {
+							// rebase non-lazy pointers (which all point internal to dyld, since dyld uses no shared libraries)
+							const uint32_t pointerCount = sect->size / sizeof(uintptr_t);
+							uintptr_t* const symbolPointers = (uintptr_t*)(sect->addr + slide);
+							for (uint32_t j=0; j < pointerCount; ++j) {
+								symbolPointers[j] += slide;
+							}
+						}
 					}
+#if __x86_64__
+					if ( (firstWritableSeg == NULL) && (seg->initprot & VM_PROT_WRITE) )
+						firstWritableSeg = seg;
+#endif
 				}
 				break;
 			case LC_DYSYMTAB:
@@ -132,50 +164,29 @@
 	}
 	
 	// use reloc's to rebase all random data pointers
+#if __x86_64__
+	const uintptr_t relocBase = firstWritableSeg->vmaddr + slide;
+#else
 	const uintptr_t relocBase = (uintptr_t)mh;
+#endif
 	const relocation_info* const relocsStart = (struct relocation_info*)(linkEditSeg->vmaddr + slide + dynamicSymbolTable->locreloff - linkEditSeg->fileoff);
 	const relocation_info* const relocsEnd = &relocsStart[dynamicSymbolTable->nlocrel];
 	for (const relocation_info* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
-		if ( (reloc->r_address & R_SCATTERED) == 0 ) {
-			if (reloc->r_length == RELOC_SIZE) {
-				switch(reloc->r_type) {
-					case GENERIC_RELOC_VANILLA:
-						*((uintptr_t*)(reloc->r_address + relocBase)) += slide;
-						break;
-				}
-			}
-		}
-		else {
-			const struct scattered_relocation_info* sreloc = (struct scattered_relocation_info*)reloc;
-			if (sreloc->r_length == RELOC_SIZE) {
-				uintptr_t* locationToFix = (uintptr_t*)(sreloc->r_address + relocBase);
-				switch(sreloc->r_type) {
-					case GENERIC_RELOC_VANILLA:
-		#if __ppc__ || __ppc64__
-					case PPC_RELOC_PB_LA_PTR:
-		#elif __i386__
-					case GENERIC_RELOC_PB_LA_PTR:
-		#endif
-					// Note the use of PB_LA_PTR is unique here.  Seems like ld should strip out all lazy pointers
-					// but it does not.  But, since all lazy-pointers point within dyld, they can be slid too
-						*locationToFix += slide;
-						break;
-				}
-			}
-		}
-	}
-	
-	// rebase non-lazy pointers (which all point internal to dyld, since dyld uses no shared libraries)
-	if ( nonLazySection != NULL ) {
-		const uint32_t pointerCount = nonLazySection->size / sizeof(uintptr_t);
-		uintptr_t* const symbolPointers = (uintptr_t*)(nonLazySection->addr + slide);
-		for (uint32_t j=0; j < pointerCount; ++j) {
-			symbolPointers[j] += slide;
-		}
-	}
-	
-	
-}
+	#if __ppc__ || __ppc64__ || __i36__
+		if ( (reloc->r_address & R_SCATTERED) != 0 )
+			throw "scattered relocation in dyld";
+	#endif
+		if ( reloc->r_length != RELOC_SIZE ) 
+			throw "relocation in dyld has wrong size";
+
+		if ( reloc->r_type != POINTER_RELOC ) 
+			throw "relocation in dyld has wrong type";
+		
+		// update pointer by amount dyld slid
+		*((uintptr_t*)(reloc->r_address + relocBase)) += slide;
+	}
+}
+
 
 //
 // For some reason the kernel loads dyld with __TEXT and __LINKEDIT writable
@@ -195,7 +206,7 @@
 					vm_size_t size = seg->vmsize;
 					const bool setCurrentPermissions = false;
 					vm_protect(mach_task_self(), addr, size, setCurrentPermissions, seg->initprot);
-					//fprintf(stderr, "dyld: segment %s, 0x%08X -> 0x%08X, set to %d\n", seg->segname, addr, addr+size-1, seg->initprot);
+					//dyld::log("dyld: segment %s, 0x%08X -> 0x%08X, set to %d\n", seg->segname, addr, addr+size-1, seg->initprot);
 				}
 				break;
 		}
@@ -204,8 +215,113 @@
 	
 }
 
+
+//
+// re-map the main executable to a new random address
+//
+static const struct mach_header* randomizeExecutableLoadAddress(const struct mach_header* orgMH, uintptr_t* appsSlide)
+{
+#if __ppc__
+	// don't slide PIE programs running under rosetta
+	if ( dyld::isRosetta() )
+		return orgMH;
+#endif
+	// count segments
+	uint32_t segCount = 0;
+	const uint32_t cmd_count = orgMH->ncmds;
+	const struct load_command* const cmds = (struct load_command*)(((char*)orgMH)+sizeof(macho_header));
+	const struct load_command* cmd = cmds;
+	for (uint32_t i = 0; i < cmd_count; ++i) {
+		if ( cmd->cmd == LC_SEGMENT_COMMAND ) {
+			const struct macho_segment_command* segCmd = (struct macho_segment_command*)cmd;
+			// page-zero and custom stacks don't move
+			if ( (strcmp(segCmd->segname, "__PAGEZERO") != 0) && (strcmp(segCmd->segname, "__UNIXSTACK") != 0) ) 
+				++segCount;
+		}
+		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
+	}
+	
+	// make copy of segment info
+	macho_segment_command segs[segCount];
+	uint32_t index = 0;
+	uintptr_t highestAddressUsed = 0;
+	uintptr_t lowestAddressUsed = UINTPTR_MAX;
+	cmd = cmds;
+	for (uint32_t i = 0; i < cmd_count; ++i) {
+		if ( cmd->cmd == LC_SEGMENT_COMMAND ) {
+			const struct macho_segment_command* segCmd = (struct macho_segment_command*)cmd;
+			if ( (strcmp(segCmd->segname, "__PAGEZERO") != 0) && (strcmp(segCmd->segname, "__UNIXSTACK") != 0) ) {
+				segs[index++] = *segCmd;
+				if ( (segCmd->vmaddr + segCmd->vmsize) > highestAddressUsed )
+					highestAddressUsed = ((segCmd->vmaddr + segCmd->vmsize) + 4095) & -4096;
+				if ( segCmd->vmaddr < lowestAddressUsed )
+					lowestAddressUsed = segCmd->vmaddr;
+				// do nothing if kernel has already randomized load address
+				if ( (strcmp(segCmd->segname, "__TEXT") == 0) && (segCmd->vmaddr != (uintptr_t)orgMH) )
+					return orgMH;
+			}
+		}
+		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
+	}
+	
+	// choose a random new base address
+#if __LP64__
+	uintptr_t highestAddressPossible = highestAddressUsed + 0x100000000ULL;
+#else
+	uintptr_t highestAddressPossible = 0x80000000;
+#endif
+	uintptr_t sizeNeeded = highestAddressUsed-lowestAddressUsed;
+	if ( (highestAddressPossible-sizeNeeded) < highestAddressUsed ) {
+		// new and old segments will overlap 
+		// need better algorithm for remapping
+		// punt and don't re-map
+		return orgMH;
+	}
+	uintptr_t possibleRange = (highestAddressPossible-sizeNeeded) - highestAddressUsed;
+	uintptr_t newBaseAddress = highestAddressUsed + ((arc4random() % possibleRange) & -4096);
+	
+	vm_address_t addr = newBaseAddress;
+	// reserve new address range
+	if ( vm_allocate(mach_task_self(), &addr, sizeNeeded, VM_FLAGS_FIXED) == KERN_SUCCESS ) {
+		// copy each segment to new address
+		for (uint32_t i = 0; i < segCount; ++i) {
+			uintptr_t newSegAddress = segs[i].vmaddr - lowestAddressUsed + newBaseAddress;
+			if ( (vm_copy(mach_task_self(), segs[i].vmaddr, segs[i].vmsize, newSegAddress) != KERN_SUCCESS)
+				|| (vm_protect(mach_task_self(), newSegAddress, segs[i].vmsize, true, segs[i].maxprot) != KERN_SUCCESS) 
+				|| (vm_protect(mach_task_self(), newSegAddress, segs[i].vmsize, false, segs[i].initprot) != KERN_SUCCESS) ) {
+				// can't copy so dealloc new region and run with original base address
+				vm_deallocate(mach_task_self(), newBaseAddress, sizeNeeded);
+				dyld::warn("could not relocate position independent exectable\n");
+				return orgMH;
+			}
+		}
+		// unmap original segments
+		vm_deallocate(mach_task_self(), lowestAddressUsed, highestAddressUsed-lowestAddressUsed);
+	
+		// run with newly mapped executable
+		*appsSlide = newBaseAddress - lowestAddressUsed;
+		return (const struct mach_header*)newBaseAddress;
+	}
+	
+	// can't get new range, so don't slide to random address
+	return orgMH;
+}
+
+
 extern "C" void dyld_exceptions_init(const struct macho_header*, uintptr_t slide); // in dyldExceptions.cpp
 extern "C" void mach_init();
+
+//
+// _pthread_keys is partitioned in a lower part that dyld will use; libSystem
+// will use the upper part.  We set __pthread_tsd_first to 1 as the start of
+// the lower part.  Libc will take #1 and c++ exceptions will take #2.  There
+// is one free key=3 left.
+//
+extern "C" {
+	extern int __pthread_tsd_first;
+	extern void _pthread_keys_init();
+}
+
 
 //
 //  This is code to bootstrap dyld.  This work in normally done for a program by dyld and crt.
@@ -222,6 +338,12 @@
 		rebaseDyld(dyldsMachHeader, slide);
 	}
 	
+	uintptr_t appsSlide = 0;
+	
+	// set pthread keys to dyld range
+	__pthread_tsd_first = 1;
+	_pthread_keys_init();
+	
 	// enable C++ exceptions to work inside dyld
 	dyld_exceptions_init(dyldsMachHeader, slide);
 	
@@ -242,8 +364,12 @@
 	// run all C++ initializers inside dyld
 	runDyldInitializers(dyldsMachHeader, slide, argc, argv, envp, apple);
 	
+	// if main executable was linked -pie, then randomize its load address
+	if ( appsMachHeader->flags & MH_PIE )
+		appsMachHeader = randomizeExecutableLoadAddress(appsMachHeader, &appsSlide);
+	
 	// now that we are done bootstrapping dyld, call dyld's main
-	return dyld::_main(appsMachHeader, argc, argv, envp, apple);
+	return dyld::_main(appsMachHeader, appsSlide, argc, argv, envp, apple);
 }