Loading...
src/ImageLoaderMachO.cpp dyld-43 dyld-46.12
--- dyld/dyld-43/src/ImageLoaderMachO.cpp
+++ dyld/dyld-46.12/src/ImageLoaderMachO.cpp
@@ -34,8 +34,16 @@
 #include <mach-o/reloc.h> 
 #include <mach-o/nlist.h> 
 #include <sys/sysctl.h>
+#include <libkern/OSAtomic.h>
 #if __ppc__ || __ppc64__
 	#include <mach-o/ppc/reloc.h>
+#endif
+#if __x86_64__
+	#include <mach-o/x86_64/reloc.h>
+#endif
+
+#ifndef S_ATTR_SELF_MODIFYING_CODE
+  #define S_ATTR_SELF_MODIFYING_CODE 0x04000000
 #endif
 
 #include "ImageLoaderMachO.h"
@@ -72,9 +80,15 @@
 	struct macho_routines_command	: public routines_command  {};	
 #endif
 
+#if __x86_64__
+	#define POINTER_RELOC X86_64_RELOC_UNSIGNED
+#else
+	#define POINTER_RELOC GENERIC_RELOC_VANILLA
+#endif
 
 uint32_t ImageLoaderMachO::fgHintedBinaryTreeSearchs = 0;
 uint32_t ImageLoaderMachO::fgUnhintedBinaryTreeSearchs = 0;
+uint32_t ImageLoaderMachO::fgCountOfImagesWithWeakExports = 0;
 
 
 //#define LINKEDIT_USAGE_DEBUG 1
@@ -176,6 +190,12 @@
 	this->parseLoadCmds();
 }
 
+ImageLoaderMachO::~ImageLoaderMachO()
+{
+	// keep count of images with weak exports
+	if ( this->hasCoalescedExports() )
+		--fgCountOfImagesWithWeakExports;
+}
 
 
 
@@ -188,7 +208,8 @@
 	const struct load_command* cmd = cmds;
 	for (unsigned long i = 0; i < cmd_count; ++i) {
 		if ( cmd->cmd == LC_SEGMENT_COMMAND ) {
-			fSegments.push_back(new SegmentMachO((struct macho_segment_command*)cmd, this, fileData));
+			if ( (((struct macho_segment_command*)cmd)->vmsize != 0) || !fIsSplitSeg )
+				fSegments.push_back(new SegmentMachO((struct macho_segment_command*)cmd, this, fileData));
 		}
 		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
 	}
@@ -316,6 +337,38 @@
 	= 0;
 #endif
 
+static int 
+_shared_region_map_file_with_mmap(
+	int fd,							// file descriptor to map into shared region
+	unsigned int regionCount,		// number of entres in array of regions
+	const _shared_region_mapping_np regions[])	// the array of regions to map
+{
+	// map in each region
+	for(unsigned int i=0; i < regionCount; ++i) {
+		void* mmapAddress = (void*)(uintptr_t)(regions[i].address);
+		size_t size = regions[i].size;
+		if ( (regions[i].init_prot & VM_PROT_ZF) != 0 ) {
+			// do nothing already vm_allocate() which zero fills
+		}
+		else {
+			int protection = 0;
+			if ( regions[i].init_prot & VM_PROT_EXECUTE )
+				protection   |= PROT_EXEC;
+			if ( regions[i].init_prot & VM_PROT_READ )
+				protection   |= PROT_READ;
+			if ( regions[i].init_prot & VM_PROT_WRITE )
+				protection   |= PROT_WRITE;
+			off_t offset = regions[i].file_offset;
+			//fprintf(stderr, "mmap(%p, 0x%08lX, block=0x%08X, %s\n", mmapAddress, size, biggestDiff, fPath);
+			mmapAddress = mmap(mmapAddress, size, protection, MAP_FILE | MAP_FIXED | MAP_PRIVATE, fd, offset);
+			if ( mmapAddress == ((void*)(-1)) )
+				throw "mmap error";
+		}
+	}
+	
+	return 0;
+}
+
 
 static
 bool
@@ -366,6 +419,8 @@
 					}
 					sNextAltLoadAddress += 0x00100000;  // skip ahead 1MB and try again
 					if ( (sNextAltLoadAddress & 0xF0000000) == 0x90000000 )
+						sNextAltLoadAddress = 0xB0000000;
+					if ( (sNextAltLoadAddress & 0xF0000000) == 0xF0000000 )
 						throw "can't map split seg anywhere";
 					foundRoom = false;
 					break;
@@ -437,6 +492,7 @@
 		kSharedRegionLoadFileState,
 		kSharedRegionMapFileState,
 		kSharedRegionMapFilePrivateState,
+		kSharedRegionMapFilePrivateMMapState,
 		kSharedRegionMapFilePrivateOutsideState,
 	};
 	static SharedRegionState sSharedRegionState = kSharedRegionStartState;
@@ -447,7 +503,15 @@
 	
 	if ( kSharedRegionStartState == sSharedRegionState ) {
 		if ( hasSharedRegionMapFile() ) {
-			if ( (context.sharedRegionMode == kUsePrivateSharedRegion) || context.slideAndPackDylibs ) { 
+			if ( context.slideAndPackDylibs ) { 
+				sharedRegionMakePrivate(context);
+				// remove underlying submap and block out 0x90000000 to 0xAFFFFFFF
+				vm_address_t addr = (vm_address_t)0x90000000;
+				vm_deallocate(mach_task_self(), addr, 0x20000000);
+				vm_allocate(mach_task_self(), &addr, 0x20000000, false);
+				sSharedRegionState = kSharedRegionMapFilePrivateMMapState;
+			}
+			else if ( context.sharedRegionMode == kUsePrivateSharedRegion ) { 
 				sharedRegionMakePrivate(context);
 				sSharedRegionState = kSharedRegionMapFilePrivateState;
 			}
@@ -476,8 +540,8 @@
 		}
 	}
 	
-	if ( kSharedRegionMapFilePrivateState == sSharedRegionState ) {
-		if ( 0 != sharedRegionMapFilePrivate(fd, offsetInFat, lenInFat, fileLen, context) ) {
+	if ( (kSharedRegionMapFilePrivateState == sSharedRegionState) || (kSharedRegionMapFilePrivateMMapState == sSharedRegionState) ) {
+		if ( 0 != sharedRegionMapFilePrivate(fd, offsetInFat, lenInFat, fileLen, context, (kSharedRegionMapFilePrivateMMapState == sSharedRegionState)) ) {
 			sSharedRegionState = kSharedRegionMapFilePrivateOutsideState;
 		}
 	}
@@ -608,12 +672,14 @@
 	return r;
 }
 
+
 int
 ImageLoaderMachO::sharedRegionMapFilePrivate(int fd,
 											 uint64_t offsetInFat,
 											 uint64_t lenInFat,
 											 uint64_t fileLen,
-											 const LinkContext& context)
+											 const LinkContext& context,
+											 bool usemmap)
 {
 	const unsigned int segmentCount = fSegments.size();
 
@@ -650,7 +716,11 @@
 	uint64_t slide = 0;
 
 	// try map it in privately (don't allow sliding if we pre-calculated the load address to pack dylibs)
-	int r = _shared_region_map_file_np(fd, mappingTableCount, mappingTable, context.slideAndPackDylibs ? NULL : &slide);
+	int r;
+	if ( usemmap )
+		r = _shared_region_map_file_with_mmap(fd, mappingTableCount, mappingTable);
+	else
+		r = _shared_region_map_file_np(fd, mappingTableCount, mappingTable, context.slideAndPackDylibs ? NULL : &slide);
 	if ( 0 == r ) {
 		if ( 0 != slide ) {
 			slide = (slide) & (-4096); // round down to page boundary
@@ -702,7 +772,7 @@
 		}
 	}
 	if ( context.slideAndPackDylibs && (r != 0) )
-		throw "can't rebase split-seg dylib";
+		throwf("can't rebase split-seg dylib %s because shared_region_map_file_np() returned %d", this->getPath(), r);
 	
 	return r;
 }
@@ -848,6 +918,10 @@
 		}
 	}
 
+	// keep count of prebound images with weak exports
+	if ( this->hasCoalescedExports() )
+		++fgCountOfImagesWithWeakExports;
+
 	// walk load commands (mapped in at start of __TEXT segment)
 	const uint32_t cmd_count = ((macho_header*)fMachOData)->ncmds;
 	const struct load_command* const cmds = (struct load_command*)&fMachOData[sizeof(macho_header)];
@@ -891,8 +965,9 @@
 							fModInitSection = sect;
 						else if ( type == S_MOD_TERM_FUNC_POINTERS )
 							fModTermSection = sect;
-						else if ( isDataSeg && (strcmp(sect->sectname, "__dyld") == 0) )
-							fDATAdyld = sect;
+						else if ( isDataSeg && (strcmp(sect->sectname, "__dyld") == 0) ) {
+								fDATAdyld = sect;
+						}
 						else if ( isDataSeg && (strcmp(sect->sectname, "__image_notify") == 0) )
 							fImageNotifySection = sect;
 					}
@@ -906,6 +981,12 @@
 					fDylibID = (struct dylib_command*)cmd;
 				}
 				break;
+			case LC_LOAD_WEAK_DYLIB:
+				// do nothing, just prevent LC_REQ_DYLD exception from occuring
+				break;
+			default:
+				if ( (cmd->cmd & LC_REQ_DYLD) != 0 )
+					throwf("unknown required load command 0x%08X", cmd->cmd);
 		}
 		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
 	}
@@ -1047,6 +1128,9 @@
 			#elif __i386__
 				const i386_thread_state_t* registers = (i386_thread_state_t*)(((char*)cmd) + 16);
 				return (void*)registers->eip;
+			#elif __x86_64__
+				const x86_thread_state64_t* registers = (x86_thread_state64_t*)(((char*)cmd) + 16);
+				return (void*)registers->rip;
 			#else
 				#warning need processor specific code
 			#endif
@@ -1122,19 +1206,29 @@
 	return info;
 }
 
+uintptr_t ImageLoaderMachO::getFirstWritableSegmentAddress()
+{
+	// in split segment libraries r_address is offset from first writable segment
+	for (std::vector<class Segment*>::iterator it=fSegments.begin(); it != fSegments.end(); ++it) {
+		if ( (*it)->writeable() ) {
+			return (*it)->getActualLoadAddress();
+		}
+	}
+	throw "no writable segment";
+}
 
 uintptr_t ImageLoaderMachO::getRelocBase()
 {
+#if __x86_64__
+	// r_address is offset from first writable segment
+	return getFirstWritableSegmentAddress();
+#endif
+#if __ppc__ || __i386__
 	if ( fIsSplitSeg ) {
 		// in split segment libraries r_address is offset from first writable segment
-		const unsigned int segmentCount = fSegments.size();
-		for(unsigned int i=0; i < segmentCount; ++i){
-			Segment* seg = fSegments[i];
-			if ( seg->writeable() ) {
-				return seg->getActualLoadAddress();
-			}
-		}
-	}
+		return getFirstWritableSegmentAddress();
+	}
+#endif
 	
 	// in non-split segment libraries r_address is offset from first segment
 	return fSegments[0]->getActualLoadAddress();
@@ -1167,14 +1261,40 @@
 }
 #endif
 
+#if __ppc__ || __i386__
+void ImageLoaderMachO::resetPreboundLazyPointers(const LinkContext& context, uintptr_t relocBase)
+{
+	// loop through all local (internal) relocation records looking for pre-bound-lazy-pointer values
+	register const uintptr_t slide = this->fSlide;
+	const relocation_info* const relocsStart = (struct relocation_info*)(&fLinkEditBase[fDynamicInfo->locreloff]);
+	const relocation_info* const relocsEnd = &relocsStart[fDynamicInfo->nlocrel];
+	for (const relocation_info* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
+		if ( (reloc->r_address & R_SCATTERED) != 0 ) {
+			const struct scattered_relocation_info* sreloc = (struct scattered_relocation_info*)reloc;
+			if (sreloc->r_length == RELOC_SIZE) {
+				uintptr_t* locationToFix = (uintptr_t*)(sreloc->r_address + relocBase);
+				switch(sreloc->r_type) {
+		#if __ppc__ 
+					case PPC_RELOC_PB_LA_PTR:
+						*locationToFix = sreloc->r_value + slide;
+						break;
+		#endif
+		#if __i386__
+					case GENERIC_RELOC_PB_LA_PTR:
+						*locationToFix = sreloc->r_value + slide;
+						break;
+		#endif
+				}
+			}
+		}
+	}
+}
+#endif
+
 void ImageLoaderMachO::doRebase(const LinkContext& context)
 {
 	// if prebound and loaded at prebound address, then no need to rebase
-	// Note: you might think that the check for allDependentLibrariesAsWhenPreBound() is not needed
-	// but it is.  If a dependent library changed, this image's lazy pointers into that library
-	// need to be updated (reset back to lazy binding handler).  That work is done most easily
-	// here because there is a PPC_RELOC_PB_LA_PTR reloc record for each lazy pointer.
-	if ( this->usablePrebinding(context) && this->usesTwoLevelNameSpace() ) {
+	if ( this->usablePrebinding(context) ) {
 		// skip rebasing cause prebound and prebinding not disabled
 		++fgImagesWithUsedPrebinding; // bump totals for statistics
 		return;
@@ -1199,18 +1319,41 @@
 		}
 	}
 
+	// cache values that are used in the following loop
+	const uintptr_t relocBase = this->getRelocBase();
+	register const uintptr_t slide = this->fSlide;
+
+#if __ppc__ || __i386__
+	// if prebound and we got here, then prebinding is not valid, so reset all lazy pointers
+	if ( this->isPrebindable() )
+		this->resetPreboundLazyPointers(context, relocBase);
+#endif
+
+	// if loaded at preferred address, no rebasing necessary
+	if ( slide == 0 ) 
+		return;
+
 	// if there are __TEXT fixups, temporarily make __TEXT writable
 	if ( fTextSegmentWithFixups != NULL ) 
 		fTextSegmentWithFixups->tempWritable();
 
-	// cache this value that is used in the following loop
-	register const uintptr_t slide = this->fSlide;
-	
 	// loop through all local (internal) relocation records
-	const uintptr_t relocBase = this->getRelocBase();
 	const relocation_info* const relocsStart = (struct relocation_info*)(&fLinkEditBase[fDynamicInfo->locreloff]);
 	const relocation_info* const relocsEnd = &relocsStart[fDynamicInfo->nlocrel];
 	for (const relocation_info* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
+	#if __x86_64__
+		// only one kind of local relocation supported for x86_64
+		if ( reloc->r_length != 3 ) 
+			throw "bad local relocation length";
+		if ( reloc->r_type != X86_64_RELOC_UNSIGNED ) 
+			throw "unknown local relocation type";
+		if ( reloc->r_pcrel != 0 ) 
+			throw "bad local relocation pc_rel";
+		if ( reloc->r_extern != 0 ) 
+			throw "extern relocation found with local relocations";
+		*((uintptr_t*)(reloc->r_address + relocBase)) += slide;
+	#endif
+	#if __ppc__ || __ppc64__ || __i386__
 		if ( (reloc->r_address & R_SCATTERED) == 0 ) {
 			if ( reloc->r_symbolnum == R_ABS ) {
 				// ignore absolute relocations
@@ -1245,12 +1388,6 @@
 					case GENERIC_RELOC_VANILLA:
 						*locationToFix += slide;
 						break;
-		#if __ppc__ || __ppc64__
-					case PPC_RELOC_PB_LA_PTR:
-						// should only see these in prebound images, and we got here so prebinding is being ignored
-						*locationToFix = sreloc->r_value + slide;
-						break;
-		#endif
 		#if __ppc__
 					case PPC_RELOC_HI16: 
 					case PPC_RELOC_LO16: 
@@ -1260,10 +1397,18 @@
 						otherRelocsPPC(locationToFix, sreloc->r_type, reloc->r_address, slide);
 						break;
 		#endif
-		#if __i386__
+		#if __ppc__ 
+					case PPC_RELOC_PB_LA_PTR:
+						// do nothing
+						break;
+		#elif __ppc64__
+					case PPC_RELOC_PB_LA_PTR:
+						// these should never exist in ppc64, but the first ld64 had a bug and created them
+						*locationToFix = sreloc->r_value + slide;
+						break;
+		#elif __i386__
 					case GENERIC_RELOC_PB_LA_PTR:
-						// should only see these in prebound images, and we got here so prebinding is being ignored
-						*locationToFix = sreloc->r_value + slide;
+						// do nothing
 						break;
 		#endif
 					default:
@@ -1274,6 +1419,7 @@
 				throw "bad local scattered relocation length";
 			}
 		}
+	#endif
 	}
 	
 	// if there were __TEXT fixups, restore write protection
@@ -1443,6 +1589,7 @@
 		}
 	}
 
+    
 	return NULL;
 }
 
@@ -1609,6 +1756,12 @@
 
 	if ( context.bindFlat || !twoLevel ) {
 		// flat lookup
+		if ( ((undefinedSymbol->n_type & N_PEXT) != 0) && ((undefinedSymbol->n_type & N_TYPE) == N_SECT) ) {
+			// is a multi-module private_extern internal reference that the linker did not optimize away
+			uintptr_t addr = undefinedSymbol->n_value + this->fSlide;
+			*foundIn = this;
+			return addr;
+		}
 		const Symbol* sym;
 		if ( context.flatExportFinder(symbolName, &sym, foundIn) )
 			return (*foundIn)->getExportedSymbolAddress(sym);
@@ -1618,13 +1771,6 @@
 			sym = this->findExportedSymbol(symbolName, NULL, false, foundIn);
 			if ( sym != NULL )
 				return (*foundIn)->getExportedSymbolAddress(sym);
-		}
-		if ( ((undefinedSymbol->n_type & N_PEXT) != 0) || ((undefinedSymbol->n_type & N_TYPE) == N_SECT) ) {
-			// could be a multi-module private_extern internal reference
-			// the static linker squirrels away the target address in n_value
-			uintptr_t addr = undefinedSymbol->n_value + this->fSlide;
-			*foundIn = this;
-			return addr;
 		}
 		if ( (undefinedSymbol->n_desc & N_WEAK_REF) != 0 ) {
 			// definition can't be found anywhere
@@ -1715,6 +1861,34 @@
 	}
 }
 
+// returns if 'addr' is within the address range of section 'sectionIndex'
+// fSlide is not used.  'addr' is assumed to be a prebound address in this image 
+bool ImageLoaderMachO::isAddrInSection(uintptr_t addr, uint8_t sectionIndex)
+{
+	uint8_t currentSectionIndex = 1;
+	const uint32_t cmd_count = ((macho_header*)fMachOData)->ncmds;
+	const struct load_command* const cmds = (struct load_command*)&fMachOData[sizeof(macho_header)];
+	const struct load_command* cmd = cmds;
+	for (unsigned long i = 0; i < cmd_count; ++i) {
+		if ( cmd->cmd == LC_SEGMENT_COMMAND ) {
+			const struct macho_segment_command* seg = (struct macho_segment_command*)cmd;
+			if ( (currentSectionIndex <= sectionIndex) && (sectionIndex < currentSectionIndex+seg->nsects) ) {
+				// 'sectionIndex' is in this segment, get section info
+				const struct macho_section* const sectionsStart = (struct macho_section*)((char*)seg + sizeof(struct macho_segment_command));
+				const struct macho_section* const section = &sectionsStart[sectionIndex-currentSectionIndex];
+				return ( (section->addr <= addr) && (addr < section->addr+section->size) );
+			}
+			else {
+				// 'sectionIndex' not in this segment, skip to next segment
+				currentSectionIndex += seg->nsects;
+			}
+		}
+		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
+	}
+	
+	return false;
+}
+
 void ImageLoaderMachO::doBindExternalRelocations(const LinkContext& context, bool onlyCoalescedSymbols)
 {
 	const uintptr_t relocBase = this->getRelocBase();
@@ -1736,7 +1910,7 @@
 	for (const relocation_info* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
 		if (reloc->r_length == RELOC_SIZE) {
 			switch(reloc->r_type) {
-				case GENERIC_RELOC_VANILLA:
+				case POINTER_RELOC:
 					{
 						const struct macho_nlist* undefinedSymbol = &fSymbolTable[reloc->r_symbolnum];
 						// if only processing coalesced symbols and this one does not require coalesceing, skip to next
@@ -1744,12 +1918,31 @@
 							continue;
 						uintptr_t* location = ((uintptr_t*)(reloc->r_address + relocBase));
 						uintptr_t value = *location;
+					#if __i386__
+						if ( reloc->r_pcrel ) {
+							value += (uintptr_t)location + 4 - fSlide;
+						}
+					#endif
 						if ( prebound ) {
 							// we are doing relocations, so prebinding was not usable
-							// in a prebound executable, the n_value field is set to the address where the symbol was found when prebound
+							// in a prebound executable, the n_value field of an undefined symbol is set to the address where the symbol was found when prebound
 							// so, subtracting that gives the initial displacement which we need to add to the newly found symbol address
-							// if mach-o relocation structs had an "addend" field this would not be necessary.
-							value -= undefinedSymbol->n_value;
+							// if mach-o relocation structs had an "addend" field this complication would not be necessary.
+							if ( ((undefinedSymbol->n_type & N_TYPE) == N_SECT) && ((undefinedSymbol->n_desc & N_WEAK_DEF) != 0) ) {
+								// weak symbols need special casing, since *location may have been prebound to a definition in another image.
+								// If *location is currently prebound to somewhere in the same section as the weak definition, we assume 
+								// that we can subtract off the weak symbol address to get the addend.
+								// If prebound elsewhere, we've lost the addend and have to assume it is zero.
+								// The prebinding to elsewhere only happens with 10.4+ update_prebinding which only operates on a small set of Apple dylibs
+								if ( (value == undefinedSymbol->n_value) || this->isAddrInSection(value, undefinedSymbol->n_sect) )
+									value -= undefinedSymbol->n_value;
+								else
+									value = 0;
+							} 
+							else {
+								// is undefined or non-weak symbol, so do subtraction to get addend
+								value -= undefinedSymbol->n_value;
+							}
 						}
 						// if undefinedSymbol is same as last time, then symbolAddr and image will resolve to the same too
 						if ( undefinedSymbol != lastUndefinedSymbol ) {
@@ -1773,7 +1966,20 @@
 							}
 						}
 						value += symbolAddr;
-						*location = value; 
+					#if __i386__
+						if ( reloc->r_pcrel ) {
+							*location = value - ((uintptr_t)location + 4);
+						}
+						else {
+							// don't dirty page if prebound value was correct
+							if ( !prebound || (*location != value) )
+								*location = value; 
+						}
+					#else
+						// don't dirty page if prebound value was correct
+						if ( !prebound || (*location != value) )
+							*location = value; 
+					#endif
 					}
 					break;
 				default:
@@ -1810,6 +2016,61 @@
 {
 	Segment* seg = fSegments[0];
 	return (const void*)seg->getActualLoadAddress();
+}
+
+uintptr_t ImageLoaderMachO::bindIndirectSymbol(uintptr_t* ptrToBind, const struct macho_section* sect, const char* symbolName, uintptr_t targetAddr, ImageLoader* targetImage, const LinkContext& context)
+{
+	if ( context.verboseBind ) {
+		const char* path = NULL;
+		if ( targetImage != NULL )
+			path = targetImage->getShortName();
+		fprintf(stderr, "dyld: bind: %s:%s$%s = %s:%s, *0x%08lx = 0x%08lx\n",
+				this->getShortName(), symbolName, (((sect->flags & SECTION_TYPE)==S_NON_LAZY_SYMBOL_POINTERS) ? "non_lazy_ptr" : "lazy_ptr"),
+				path, symbolName, (uintptr_t)ptrToBind, targetAddr);
+	}
+	if ( context.bindingHandler != NULL ) {
+		const char* path = NULL;
+		if ( targetImage != NULL )
+			path = targetImage->getShortName();
+		targetAddr = (uintptr_t)context.bindingHandler(path, symbolName, (void *)targetAddr);
+	}
+#if __i386__
+	// i386 has special self-modifying stubs that change from "CALL rel32" to "JMP rel32"
+	if ( ((sect->flags & SECTION_TYPE) == S_SYMBOL_STUBS) && ((sect->flags & S_ATTR_SELF_MODIFYING_CODE) != 0) && (sect->reserved2 == 5) ) {
+		uint32_t rel32 = targetAddr - (((uint32_t)ptrToBind)+5);
+		// re-write instruction in a thread-safe manner
+		// use 8-byte compare-and-swap to alter 5-byte jump table entries
+		// loop is required in case the extra three bytes that cover the next entry are altered by another thread
+		bool done = false;
+		while ( !done ) {
+			volatile int64_t* jumpPtr = (int64_t*)ptrToBind;
+			int pad = 0;
+			// By default the three extra bytes swapped follow the 5-byte JMP.
+			// But, if the 5-byte jump is up against the end of the __IMPORT segment
+			// We don't want to access bytes off the end of the segment, so we shift
+			// the extra bytes to precede the 5-byte JMP.
+			if ( (((uint32_t)ptrToBind + 8) & 0x00000FFC) == 0x00000000 ) {
+				jumpPtr = (int64_t*)((uint32_t)ptrToBind - 3);
+				pad = 3;
+			}
+			int64_t oldEntry = *jumpPtr;
+			union {
+				int64_t int64;
+				uint8_t bytes[8];
+			} newEntry;
+			newEntry.int64 = oldEntry;
+			newEntry.bytes[pad+0] = 0xE9; // JMP rel32
+			newEntry.bytes[pad+1] = rel32 & 0xFF;
+			newEntry.bytes[pad+2] = (rel32 >> 8) & 0xFF;
+			newEntry.bytes[pad+3] = (rel32 >> 16) & 0xFF;
+			newEntry.bytes[pad+4] = (rel32 >> 24) & 0xFF;
+			done = OSAtomicCompareAndSwap64Barrier(oldEntry, newEntry.int64, (int64_t*)jumpPtr);
+		}
+	}
+	else
+#endif
+	*ptrToBind = targetAddr;
+	return targetAddr;
 }
 
 
@@ -1830,38 +2091,39 @@
 					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
 					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
 						const uint8_t type = sect->flags & SECTION_TYPE;
+						uint32_t symbolIndex = INDIRECT_SYMBOL_LOCAL;
 						if ( type == S_LAZY_SYMBOL_POINTERS ) {
 							const uint32_t pointerCount = sect->size / sizeof(uintptr_t);
 							uintptr_t* const symbolPointers = (uintptr_t*)(sect->addr + fSlide);
 							if ( (lazyPointer >= symbolPointers) && (lazyPointer < &symbolPointers[pointerCount]) ) {
 								const uint32_t indirectTableOffset = sect->reserved1;
 								const uint32_t lazyIndex = lazyPointer - symbolPointers;
-								uint32_t symbolIndex = indirectTable[indirectTableOffset + lazyIndex];
-								if ( symbolIndex != INDIRECT_SYMBOL_ABS && symbolIndex != INDIRECT_SYMBOL_LOCAL ) {
-									ImageLoader *image = NULL;
-									const char *path = NULL;
-									uintptr_t symbolAddr = this->resolveUndefined(context,  &fSymbolTable[symbolIndex], twoLevel, &image);
-									if ( context.verboseBind ) {
-										if(NULL == path && NULL != image) {
-											path = image->getShortName();
-										}
-										fprintf(stderr, "dyld: bind: %s:%s$%s = %s:%s, *0x%08lx = 0x%08lx\n",
-												this->getShortName(), &fStrings[fSymbolTable[symbolIndex].n_un.n_strx], "lazy_ptr",
-												path, &fStrings[fSymbolTable[symbolIndex].n_un.n_strx], (uintptr_t)&symbolPointers[lazyIndex], symbolAddr);
-									}
-									if ( NULL != context.bindingHandler ) {
-										if(NULL == path && NULL != image) {
-											path = image->getPath();
-										}
-										symbolAddr = (uintptr_t)context.bindingHandler(path, &fStrings[fSymbolTable[symbolIndex].n_un.n_strx], (void *)symbolAddr);
-									}
-									symbolPointers[lazyIndex] = symbolAddr;
-									// update stats
-									fgTotalLazyBindFixups++;
-									return symbolPointers[lazyIndex];
-								}
+								symbolIndex = indirectTable[indirectTableOffset + lazyIndex];
 							}
 						}
+					#if __i386__
+						else if ( (type == S_SYMBOL_STUBS) && (sect->flags & S_ATTR_SELF_MODIFYING_CODE) && (sect->reserved2 == 5) ) {
+							// 5 bytes stubs on i386 are new "fast stubs"
+							uint8_t* const jmpTableBase = (uint8_t*)(sect->addr + fSlide);
+							uint8_t* const jmpTableEnd = jmpTableBase + sect->size;
+							// initial CALL instruction in jump table leaves pointer to next entry, so back up
+							uint8_t* const jmpTableEntryToPatch = ((uint8_t*)lazyPointer) - 5;  
+							lazyPointer = (uintptr_t*)jmpTableEntryToPatch; 
+							if ( (jmpTableEntryToPatch >= jmpTableBase) && (jmpTableEntryToPatch < jmpTableEnd) ) {
+								const uint32_t indirectTableOffset = sect->reserved1;
+								const uint32_t entryIndex = (jmpTableEntryToPatch - jmpTableBase)/5;
+								symbolIndex = indirectTable[indirectTableOffset + entryIndex];
+							}
+						}
+					#endif
+						if ( symbolIndex != INDIRECT_SYMBOL_ABS && symbolIndex != INDIRECT_SYMBOL_LOCAL ) {
+							const char* symbolName = &fStrings[fSymbolTable[symbolIndex].n_un.n_strx];
+							ImageLoader* image = NULL;
+							uintptr_t symbolAddr = this->resolveUndefined(context,  &fSymbolTable[symbolIndex], twoLevel, &image);
+							symbolAddr = this->bindIndirectSymbol(lazyPointer, sect, symbolName, symbolAddr, image,  context);
+							++fgTotalLazyBindFixups;
+							return symbolAddr;
+						}
 					}
 				}
 				break;
@@ -1870,6 +2132,7 @@
 	}
 	throw "lazy pointer not found";
 }
+
 
 
 
@@ -1890,26 +2153,37 @@
 					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
 					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
 						const uint8_t type = sect->flags & SECTION_TYPE;
-						const uint32_t pointerCount = sect->size / sizeof(uintptr_t);
+						uint32_t elementSize = sizeof(uintptr_t);
+						uint32_t elementCount = sect->size / elementSize;
 						if ( type == S_NON_LAZY_SYMBOL_POINTERS ) {
 							if ( (bindness == kLazyOnly) || (bindness == kLazyOnlyNoDependents) )
 								continue;
 						}
 						else if ( type == S_LAZY_SYMBOL_POINTERS ) {
 							// process each symbol pointer in this section
-							fgTotalPossibleLazyBindFixups += pointerCount;
+							fgTotalPossibleLazyBindFixups += elementCount;
 							if ( bindness == kNonLazyOnly )
 								continue;
 						}
+				#if __i386__
+						else if ( (type == S_SYMBOL_STUBS) && (sect->flags & S_ATTR_SELF_MODIFYING_CODE) && (sect->reserved2 == 5) ) {
+							// process each jmp entry in this section
+							elementCount = sect->size / 5;
+							elementSize = 5;
+							fgTotalPossibleLazyBindFixups += elementCount;
+							if ( bindness == kNonLazyOnly )
+								continue;
+						}
+				#endif
 						else {
 							continue;
 						}
 						const uint32_t indirectTableOffset = sect->reserved1;
-						uintptr_t* const symbolPointers = (uintptr_t*)(sect->addr + fSlide);
-						for (uint32_t j=0; j < pointerCount; ++j) {
+						uint8_t* ptrToBind = (uint8_t*)(sect->addr + fSlide);
+						for (uint32_t j=0; j < elementCount; ++j, ptrToBind += elementSize) {
 							uint32_t symbolIndex = indirectTable[indirectTableOffset + j];
 							if ( symbolIndex == INDIRECT_SYMBOL_LOCAL) {
-								symbolPointers[j] += this->fSlide;
+								*((uintptr_t*)ptrToBind) += this->fSlide;
 							}
 							else if ( symbolIndex == INDIRECT_SYMBOL_ABS) {
 								// do nothing since already has absolute address
@@ -1947,27 +2221,13 @@
 									continue;
 								uintptr_t symbolAddr;
 									symbolAddr = resolveUndefined(context, sym, twoLevel, &image);
-								if ( context.verboseBind ) {
-									const char *path = NULL;
-									if(NULL != image) {
-										path = image->getShortName();
-									}
-									const char *typeName;
-									if ( type == S_LAZY_SYMBOL_POINTERS ) {
-										typeName = "lazy_ptr";
-									}
-									else {
-										typeName = "non_lazy_ptr";
-									}
-									fprintf(stderr, "dyld: bind: %s:%s$%s = %s:%s, *0x%08lx = 0x%08lx\n",
-											this->getShortName(), &fStrings[sym->n_un.n_strx], typeName,
-											path, &fStrings[sym->n_un.n_strx], (uintptr_t)&symbolPointers[j], symbolAddr);
-								}
-								symbolPointers[j] = symbolAddr;
+									
+								// update pointer
+								symbolAddr = this->bindIndirectSymbol((uintptr_t*)ptrToBind, sect, &fStrings[sym->n_un.n_strx], symbolAddr, image,  context);
 							}
 						}
 						// update stats
-						fgTotalBindFixups += pointerCount;
+						fgTotalBindFixups += elementCount;
 					}
 				}
 				break;
@@ -2006,9 +2266,10 @@
 // These are defined in dyldStartup.s
 extern "C" void stub_binding_helper();
 extern "C" bool dyld_func_lookup(const char* name, uintptr_t* address);
-
-
-void ImageLoaderMachO::setupLazyPointerHandler()
+extern "C" void fast_stub_binding_helper_interface();
+
+
+void ImageLoaderMachO::setupLazyPointerHandler(const LinkContext& context)
 {
 	if ( fDATAdyld != NULL ) {
 		struct DATAdyld* dd = (struct DATAdyld*)(fDATAdyld->addr + fSlide);
@@ -2027,12 +2288,51 @@
 		//	save = dd->stubBindHelper;	
 #endif
 	}
+#if __i386__
+	if ( ! this->usablePrebinding(context) || !this->usesTwoLevelNameSpace() ) {
+		// reset all "fast" stubs
+		const uint32_t cmd_count = ((macho_header*)fMachOData)->ncmds;
+		const struct load_command* const cmds = (struct load_command*)&fMachOData[sizeof(macho_header)];
+		const struct load_command* cmd = cmds;
+		for (uint32_t i = 0; i < cmd_count; ++i) {
+			switch (cmd->cmd) {
+				case LC_SEGMENT_COMMAND:
+				{
+					const struct macho_segment_command* seg = (struct macho_segment_command*)cmd;
+					const struct macho_section* const sectionsStart = (struct macho_section*)((char*)seg + sizeof(struct macho_segment_command));
+					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
+					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
+						const uint8_t type = sect->flags & SECTION_TYPE;
+						if ( (type == S_SYMBOL_STUBS) && (sect->flags & S_ATTR_SELF_MODIFYING_CODE) && (sect->reserved2 == 5) ) {
+							// reset each jmp entry in this section
+							uint8_t* start = (uint8_t*)(sect->addr + this->fSlide);
+							uint8_t* end = start + sect->size;
+							uintptr_t dyldHandler = (uintptr_t)&fast_stub_binding_helper_interface;
+							for (uint8_t* entry = start; entry < end; entry += 5) {
+								uint32_t rel32 = dyldHandler - (((uint32_t)entry)+5);
+								entry[0] = 0xE8; // CALL rel32
+								entry[1] = rel32 & 0xFF;
+								entry[2] = (rel32 >> 8) & 0xFF;
+								entry[3] = (rel32 >> 16) & 0xFF;
+								entry[4] = (rel32 >> 24) & 0xFF;
+							}
+						}
+					}
+				}
+			}
+			cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
+		}
+	}
+#endif
 }
 
 bool ImageLoaderMachO::usablePrebinding(const LinkContext& context) const
 {
 	// if prebound and loaded at prebound address, and all libraries are same as when this was prebound, then no need to bind
-	if ( this->isPrebindable() && this->allDependentLibrariesAsWhenPreBound() && (this->getSlide() == 0) ) {
+	if ( this->isPrebindable() 
+		&& (this->getSlide() == 0) 
+		&& this->usesTwoLevelNameSpace()
+		&& this->allDependentLibrariesAsWhenPreBound() ) {
 		// allow environment variables to disable prebinding
 		if ( context.bindFlat )
 			return false;
@@ -2053,13 +2353,13 @@
 void ImageLoaderMachO::doBind(const LinkContext& context, BindingLaziness bindness)
 {
 	// set dyld entry points in image
-	this->setupLazyPointerHandler();
+	this->setupLazyPointerHandler(context);
 
 	// if prebound and loaded at prebound address, and all libraries are same as when this was prebound, then no need to bind
 	// note: flat-namespace binaries need to be imports rebound (even if correctly prebound)
 	if ( this->usablePrebinding(context) && this->usesTwoLevelNameSpace() ) {
-		// if image has coalesced symbols, then these need to be rebound
-		if ( this->needsCoalescing() ) {
+		// if image has coalesced symbols, then these need to be rebound, unless this is the only image with weak symbols
+		if ( this->needsCoalescing() && (fgCountOfImagesWithWeakExports > 1) ) {
 			this->doBindExternalRelocations(context, true);
 			this->doBindIndirectSymbolPointers(context, kLazyAndNonLazy, true);
 		}
@@ -2164,6 +2464,7 @@
 	ImageLoader::printStatistics(imageCount);
 	fprintf(stderr, "total hinted binary tree searches:    %d\n", fgHintedBinaryTreeSearchs);
 	fprintf(stderr, "total unhinted binary tree searches:  %d\n", fgUnhintedBinaryTreeSearchs);
+	fprintf(stderr, "total images with weak exports:  %d\n", fgCountOfImagesWithWeakExports);
 	
 #if LINKEDIT_USAGE_DEBUG
 	fprintf(stderr, "linkedit pages accessed (%lu):\n", sLinkEditPageBuckets.size());
@@ -2296,13 +2597,14 @@
 	// walk all exports and slide their n_value
 	struct macho_nlist* lastExport = &symbolTable[dysymtab->iextdefsym+dysymtab->nextdefsym];
 	for (struct macho_nlist* entry = &symbolTable[dysymtab->iextdefsym]; entry < lastExport; ++entry) {
-		entry->n_value += fSlide;
+		if ( (entry->n_type & N_TYPE) == N_SECT )
+			entry->n_value += fSlide;
 	}
 
 	// walk all local symbols and slide their n_value
 	struct macho_nlist* lastLocal = &symbolTable[dysymtab->ilocalsym+dysymtab->nlocalsym];
 	for (struct macho_nlist* entry = &symbolTable[dysymtab->ilocalsym]; entry < lastLocal; ++entry) {
-		if ( (entry->n_type & N_TYPE) == N_SECT )
+		if ( entry->n_sect != NO_SECT )
 			entry->n_value += fSlide;
 	}
 	
@@ -2316,7 +2618,7 @@
 				switch(sreloc->r_type) {
 		#if __ppc__ || __ppc64__
 					case PPC_RELOC_PB_LA_PTR:
-		#elif __i386__
+		#elif __i386__ || __x86_64__
 					case GENERIC_RELOC_PB_LA_PTR:
 		#else
 			#error unknown architecture
@@ -2327,8 +2629,67 @@
 			}
 		}
 	}
-
-}
+	
+	// if multi-module, fix up objc_addr (10.4 and later runtime does not use this, but we want to keep file checksum consistent)
+	if ( dysymtab->nmodtab != 0 ) {
+		dylib_module* const modulesStart = (struct dylib_module*)(&fileToPrebind[dysymtab->modtaboff]);
+		dylib_module* const modulesEnd = &modulesStart[dysymtab->nmodtab];
+		for (dylib_module* module=modulesStart; module < modulesEnd; ++module) {
+			if ( module->objc_module_info_size != 0 ) {
+				module->objc_module_info_addr += fSlide;
+			}
+		}
+	}
+}
+
+// file on disk has been reprebound, but we are still mapped to old file
+void ImageLoaderMachO::prebindUnmap(const LinkContext& context)
+{
+	// this removes all mappings to the old file, so the kernel will unlink (delete) it.
+	//  We need to leave the load commands and __LINKEDIT in place
+	for (std::vector<class Segment*>::iterator it=fSegments.begin(); it != fSegments.end(); ++it) {
+		void* segmentAddress = (void*)((*it)->getActualLoadAddress());
+		uintptr_t segmentSize = (*it)->getSize();
+		//fprintf(stderr, "unmapping segment %s at %p for %s\n", (*it)->getName(), segmentAddress, this->getPath());
+		// save load commands at beginning of __TEXT segment
+		if ( segmentAddress == fMachOData ) {
+			// typically load commands are one or two pages in size, so ok to alloc on stack
+			uint32_t loadCmdSize = sizeof(macho_header) + ((macho_header*)fMachOData)->sizeofcmds;
+			uint32_t loadCmdPages = (loadCmdSize+4095) & (-4096);
+			uint8_t loadcommands[loadCmdPages];
+			memcpy(loadcommands, fMachOData, loadCmdPages);
+			// unmap whole __TEXT segment
+			munmap((void*)(fMachOData), segmentSize);
+			// allocate and copy back mach_header and load commands
+			vm_address_t addr = (vm_address_t)fMachOData;
+			int r2 = vm_allocate(mach_task_self(), &addr, loadCmdPages, false /*at this address*/);
+			if ( r2 != 0 )
+				fprintf(stderr, "prebindUnmap() vm_allocate for __TEXT %d failed\n", loadCmdPages);
+			memcpy((void*)fMachOData, loadcommands, loadCmdPages);
+			//fprintf(stderr, "copying back load commands to %p size=%u for %s\n", segmentAddress, loadCmdPages, this->getPath());
+		}
+		else if ( strcmp((*it)->getName(), "__LINKEDIT") == 0 ) {
+			uint32_t linkEditSize = segmentSize;
+			uint32_t linkEditPages = (linkEditSize+4095) & (-4096);
+			void* linkEditTmp = malloc(linkEditPages);
+			memcpy(linkEditTmp, segmentAddress, linkEditPages);
+			// unmap whole __LINKEDIT segment
+			munmap(segmentAddress, segmentSize);
+			vm_address_t addr = (vm_address_t)segmentAddress;
+			int r2 = vm_allocate(mach_task_self(), &addr, linkEditPages, false /*at this address*/);
+			if ( r2 != 0 )
+				fprintf(stderr, "prebindUnmap() vm_allocate for __LINKEDIT %d failed\n", linkEditPages);
+			memcpy(segmentAddress, linkEditTmp, linkEditPages);
+			//fprintf(stderr, "copying back __LINKEDIT to %p size=%u for %s\n", segmentAddress, linkEditPages, this->getPath());
+			free(linkEditTmp);
+		}
+		else {
+			// unmap any other segment
+			munmap((void*)(segmentAddress), (*it)->getSize());
+		}
+	}
+}
+
 
 
 SegmentMachO::SegmentMachO(const struct macho_segment_command* cmd, ImageLoaderMachO* image, const uint8_t* fileData)