Loading...
launch-cache/update_dyld_shared_cache.cpp dyld-239.4 dyld-360.14
--- dyld/dyld-239.4/launch-cache/update_dyld_shared_cache.cpp
+++ dyld/dyld-360.14/launch-cache/update_dyld_shared_cache.cpp
@@ -68,7 +68,7 @@
 #define SELOPT_WRITE
 #include "objc-shared-cache.h"
 
-#define FIRST_DYLIB_TEXT_OFFSET 0x8000
+#define FIRST_DYLIB_TEXT_OFFSET 0x10000
 
 #ifndef LC_FUNCTION_STARTS
     #define LC_FUNCTION_STARTS 0x26
@@ -77,6 +77,7 @@
 static bool							verbose = false;
 static bool							progress = false;
 static bool							iPhoneOS = false;
+static bool							rootless = true;
 static std::vector<const char*>		warnings;
 
 
@@ -125,10 +126,11 @@
 
 	static void			addArchPair(ArchPair ap);
 	static void			addRoot(const char* vpath, const std::set<ArchPair>& archs);
+	static uint64_t		maxCacheSizeForArchPair(ArchPair ap);
 	static void			findSharedDylibs(ArchPair ap);
 	static ArchGraph*	graphForArchPair(ArchPair ap) { return fgPerArchGraph[ap]; }
 	static void			setFileSystemRoot(const char* root) { fgFileSystemRoot = root; }
-	static void			setFileSystemOverlay(const char* overlay) { fgFileSystemOverlay = overlay; }
+	static void			setFileSystemOverlay(const std::vector<const char*>& overlays);
 	static const char*	archName(ArchPair ap);
 	
 	ArchPair											getArchPair() { return fArchPair; }
@@ -148,6 +150,8 @@
 		const MachOLayoutAbstraction*	getLayout() const { return fLayout; }
 		size_t							useCount() const { return fRootsDependentOnThis.size(); }
 		bool							allDependentsFound() const { return !fDependentMissing; }
+		bool							dependsOnDylibList() const { return fRootsDependentOnThis.count(const_cast<DependencyNode*>(this)); }
+
 	private:
 		ArchGraph*									fGraph;
 		const char*									fPath;
@@ -170,7 +174,7 @@
 
 	static std::map<ArchPair, ArchGraph*>	fgPerArchGraph;
 	static const char*						fgFileSystemRoot;
-	static const char*						fgFileSystemOverlay;
+	static std::vector<const char*> 		fgFileSystemOverlays;
 	
 	ArchPair									fArchPair;
 	std::set<DependencyNode*>					fRoots;
@@ -180,12 +184,18 @@
 };
 std::map<ArchPair, ArchGraph*>		ArchGraph::fgPerArchGraph;
 const char*							ArchGraph::fgFileSystemRoot = "";
-const char*							ArchGraph::fgFileSystemOverlay = "";
+std::vector<const char*> 			ArchGraph::fgFileSystemOverlays;
 
 void ArchGraph::addArchPair(ArchPair ap)
 {
 	//fprintf(stderr, "adding ArchPair 0x%08X,0x%08X\n", ap.arch, ap.subtype);
 	fgPerArchGraph[ap] = new ArchGraph(ap);
+}
+
+void ArchGraph::setFileSystemOverlay(const std::vector<const char*>& overlays) 
+{ 
+	for (std::vector<const char*>::const_iterator it=overlays.begin(); it != overlays.end(); ++it) 
+		fgFileSystemOverlays.push_back(*it);
 }
 
 void ArchGraph::addRoot(const char* vpath, const std::set<ArchPair>& onlyArchs)
@@ -194,12 +204,14 @@
 	char completePath[MAXPATHLEN];
 	const char* path = NULL;
 	// check -overlay path first
-	if ( fgFileSystemOverlay[0] != '\0' ) {
-		strcpy(completePath, fgFileSystemOverlay);
+	for (std::vector<const char*>::const_iterator it=fgFileSystemOverlays.begin(); it != fgFileSystemOverlays.end(); ++it) {
+		strcpy(completePath, *it);
 		strcat(completePath, vpath);	// assumes vpath starts with '/'
 		struct stat stat_buf;
-		if ( stat(completePath, &stat_buf) == 0 )
+		if ( stat(completePath, &stat_buf) == 0 ) {
 			path = completePath;
+			break;
+		}
 	}
 	// if not found in overlay, check for -root
 	if ( (path == NULL) && (fgFileSystemRoot[0] != '\0') ) {
@@ -255,41 +267,42 @@
 {
 	//fprintf(stderr, "getNodeForVirtualPath(%s)\n", vpath);
 	char completePath[MAXPATHLEN];
-	if ( fgFileSystemOverlay[0] != '\0' ) {
+	for (std::vector<const char*>::const_iterator it=fgFileSystemOverlays.begin(); it != fgFileSystemOverlays.end(); ++it) {
+		const char* overlayPath = *it;
 		// using -overlay means if /overlay/path/dylib exists use it, otherwise use /path/dylib
-		strcpy(completePath, fgFileSystemOverlay);
+		strcpy(completePath, overlayPath);
 		strcat(completePath, vpath);	// assumes vpath starts with '/'
 		struct stat stat_buf;
-		if ( stat(completePath, &stat_buf) == 0 )
+		if ( stat(completePath, &stat_buf) == 0 ) {
 			return this->getNode(completePath);
-		else {
-			// <rdar://problem/9279770> support when install name is a symlink
-			const char* pathToSymlink = vpath;
-			if ( fgFileSystemRoot[0] != '\0' ) {
-				strcpy(completePath, fgFileSystemRoot);
-				strcat(completePath, vpath);
-				pathToSymlink = completePath;
-			}
-			if ( (lstat(pathToSymlink, &stat_buf) == 0) && S_ISLNK(stat_buf.st_mode) ) {
-				// requested path did not exist in /overlay, but leaf of path is a symlink in /
-				char pathInSymLink[MAXPATHLEN];
-				size_t res = readlink(pathToSymlink, pathInSymLink, sizeof(pathInSymLink));
-				if ( res != -1 ) {
-					pathInSymLink[res] = '\0';
-					if ( pathInSymLink[0] != '/' ) {
-						char symFullPath[MAXPATHLEN];
-						strcpy(symFullPath, vpath);
-						char* lastSlash = strrchr(symFullPath, '/');
-						if ( lastSlash != NULL ) {
-							strcpy(lastSlash+1, pathInSymLink);
-							// (re)try looking for what symlink points to, but in /overlay
-							return this->getNodeForVirtualPath(symFullPath);
-						}
-					} 
-				}
-			}
-		}
-	}
+		}
+		// <rdar://problem/9279770> support when install name is a symlink
+		const char* pathToSymlink = vpath;
+		if ( fgFileSystemRoot[0] != '\0' ) {
+			strcpy(completePath, fgFileSystemRoot);
+			strcat(completePath, vpath);
+			pathToSymlink = completePath;
+		}
+		if ( (lstat(pathToSymlink, &stat_buf) == 0) && S_ISLNK(stat_buf.st_mode) ) {
+			// requested path did not exist in /overlay, but leaf of path is a symlink in /
+			char pathInSymLink[MAXPATHLEN];
+			size_t res = readlink(pathToSymlink, pathInSymLink, sizeof(pathInSymLink));
+			if ( res != -1 ) {
+				pathInSymLink[res] = '\0';
+				if ( pathInSymLink[0] != '/' ) {
+					char symFullPath[MAXPATHLEN];
+					strcpy(symFullPath, vpath);
+					char* lastSlash = strrchr(symFullPath, '/');
+					if ( lastSlash != NULL ) {
+						strcpy(lastSlash+1, pathInSymLink);
+						// (re)try looking for what symlink points to, but in /overlay
+						return this->getNodeForVirtualPath(symFullPath);
+					}
+				} 
+			}
+		}
+	}
+
 	if ( fgFileSystemRoot[0] != '\0' ) {
 		// using -root means always use /rootpath/usr/lib
 		strcpy(completePath, fgFileSystemRoot);
@@ -371,8 +384,12 @@
 			aliasPath = &realPath[strlen(fgFileSystemRoot)];
 		}
 		// <rdar://problem/11192810> Too many aliases in -overlay mode
-		if ( (fgFileSystemOverlay != NULL) && (fgFileSystemOverlay[0] != '\0') && (strncmp(realPath, fgFileSystemOverlay, strlen(fgFileSystemOverlay)) == 0) ) {
-			aliasPath = &realPath[strlen(fgFileSystemOverlay)];
+		for (std::vector<const char*>::const_iterator it=fgFileSystemOverlays.begin(); it != fgFileSystemOverlays.end(); ++it) {
+			const char* overlayPath = *it;
+			if ( strncmp(realPath, overlayPath, strlen(overlayPath)) == 0 ) {
+				aliasPath = &realPath[strlen(overlayPath)];
+				break;
+			}
 		}
 		if ( fAliasesMap.find(aliasPath) == fAliasesMap.end() ) {
 			if ( strcmp(aliasPath, node->getLayout()->getID().name) != 0 ) {
@@ -473,16 +490,32 @@
 }
 
 
+
 ArchGraph::DependencyNode::DependencyNode(ArchGraph* graph, const char* path, const MachOLayoutAbstraction* layout) 
  : fGraph(graph), fPath(strdup(path)), fLayout(layout), fDependenciesLoaded(false), fDependentMissing(false)
 {
 	//fprintf(stderr, "new DependencyNode(0x%08X, %s)\n", graph->fArch, path);
 }
 
+uint64_t ArchGraph::maxCacheSizeForArchPair(ArchPair ap) {
+	switch ( ap.arch ) {
+		case CPU_TYPE_I386:
+			return 0x20000000;
+		case CPU_TYPE_X86_64:
+			return 0x40000000;
+		case CPU_TYPE_ARM:
+			return ARM_SHARED_REGION_SIZE;
+		case CPU_TYPE_ARM64:
+			return ARM64_SHARED_REGION_SIZE;
+		default: return UINT64_MAX;
+	}
+}
+
 void ArchGraph::findSharedDylibs(ArchPair ap)
 {
 	const PathToNode& nodes = fgPerArchGraph[ap]->fNodes;
 	std::set<const MachOLayoutAbstraction*> possibleLibs;
+	std::map<const MachOLayoutAbstraction*, const DependencyNode *> layoutToNode;
 	//fprintf(stderr, "shared for arch %s\n", archName(ap));
 	for(PathToNode::const_iterator it = nodes.begin(); it != nodes.end(); ++it) {
 		DependencyNode* node = it->second;
@@ -493,9 +526,10 @@
 				char* msg;
 				if ( sharable(layout, ap, &msg) ) {
 					possibleLibs.insert(layout);
+					layoutToNode[layout] = node;
 				}
 				else {
-					if ( layout->getID().name[0] == '@' ) {
+					if ( !iPhoneOS && (layout->getID().name[0] == '@') ) {
 						// <rdar://problem/7770139> update_dyld_shared_cache should suppress warnings for embedded frameworks
 					}
 					else {
@@ -510,10 +544,49 @@
 	// prune so that all shareable libs depend only on other shareable libs
 	std::set<const MachOLayoutAbstraction*>& sharedLibs = fgPerArchGraph[ap]->fSharedDylibs;
 	std::map<const MachOLayoutAbstraction*,bool> shareableMap;
+	uint64_t totalLibSize = 0;
 	for (std::set<const MachOLayoutAbstraction*>::iterator lit = possibleLibs.begin(); lit != possibleLibs.end(); ++lit) {
-		if ( canBeShared(*lit, ap, possibleLibs, shareableMap) )
+		if ( canBeShared(*lit, ap, possibleLibs, shareableMap) ) {
+			totalLibSize += (*lit)->getVMSize();
 			sharedLibs.insert(*lit);
-	}
+		}
+	}
+
+#if 0  // disable auto-eviction because it happens before linkedit optimization which means it is overly conservative.
+
+	// Check to see if the unoptimized cache size is too large, if so trim out some libraries
+	uint64_t maxCacheSize = maxCacheSizeForArchPair(ap);
+	if (totalLibSize > maxCacheSize) {
+		fprintf(stderr, "update_dyld_shared_cache: unoptimized %s shared cache overflow, total VM space: %lldMB (max=%lldMB)\n", archName(ap), totalLibSize/(1024*1024), maxCacheSize/(1024*1024));
+		std::vector<const MachOLayoutAbstraction*> removableLibs;
+
+		for (const MachOLayoutAbstraction* layout : sharedLibs) {
+			// Every library uses itself, and every MH_DYLIB has an extra useCount, so we know useCount of 2 implies nothing else in the shared cache uses it
+			if (layoutToNode[layout]->useCount() == 2) {
+				if ( layoutToNode[layout]->dependsOnDylibList() ) {
+					removableLibs.push_back(layout);
+					//fprintf(stderr, "  possible to evict: %s\n", layout->getID().name);
+				}
+			}
+		}
+
+		std::sort(removableLibs.begin(), removableLibs.end(),  [](const MachOLayoutAbstraction* a, const MachOLayoutAbstraction* b){
+			return a->getVMSize() < b->getVMSize();
+		});
+
+		while ( (totalLibSize > maxCacheSize) && !removableLibs.empty() ) {
+			const MachOLayoutAbstraction* largestRemovableLib = removableLibs.back();
+			removableLibs.pop_back();
+			if ( largestRemovableLib->getVMSize() > 1024*1024 )
+				fprintf(stderr, "update_dyld_shared_cache: evicting % 3lldMB leaf dylib %s\n", largestRemovableLib->getVMSize()/(1024*1024), largestRemovableLib->getID().name);
+			else
+				fprintf(stderr, "update_dyld_shared_cache: evicting % 3lldKB leaf dylib %s\n", largestRemovableLib->getVMSize()/1024, largestRemovableLib->getID().name);
+			sharedLibs.erase(largestRemovableLib);
+			totalLibSize -= largestRemovableLib->getVMSize();
+		}
+		fprintf(stderr, "update_dyld_shared_cache: unoptimized %s shared cache reduced to total VM space: %lldMB\n", archName(ap), totalLibSize/1024/1024);
+	}
+#endif
 }
 
 const char*	ArchGraph::archName(ArchPair ap)
@@ -522,7 +595,12 @@
 		case CPU_TYPE_I386:
 			return "i386";
 		case CPU_TYPE_X86_64:
-			return "x86_64";
+			switch ( ap.subtype ) {
+				case CPU_SUBTYPE_X86_64_H:
+					return "x86_64h";
+				default:
+					return "x86_64";
+			}
 		case CPU_TYPE_ARM:
 			switch ( ap.subtype ) {
 				case CPU_SUBTYPE_ARM_V4T:
@@ -544,6 +622,8 @@
 				default:
 					return "arm";
 			}
+		case CPU_TYPE_ARM64:
+			return "arm64";
 		default:
 			return "unknown";
 	}
@@ -551,20 +631,19 @@
 
 bool ArchGraph::sharable(const MachOLayoutAbstraction* layout, ArchPair ap, char** msg)
 {
+	int trustErr = layout->notTrusted();
 	if ( ! layout->isTwoLevelNamespace() ) 
 		asprintf(msg, "can't put %s in shared cache because it was built -flat_namespace", layout->getID().name);
+	else if ( ! layout->inSharableLocation() )
+		asprintf(msg, "can't put %s in shared cache because its -install_name is not in /usr/lib or /System/Library", layout->getID().name);
 	else if ( ! layout->hasSplitSegInfo() ) 
 		asprintf(msg, "can't put %s in shared cache because it was not built for %s or later", layout->getID().name, (iPhoneOS ? "iPhoneOS 3.1" : "MacOSX 10.5"));
-	else if ( ! layout->isRootOwned() )
-		asprintf(msg, "can't put %s in shared cache because it is not owned by root", layout->getID().name);
-	else if ( ! layout->inSharableLocation() )
-		asprintf(msg, "can't put %s in shared cache because it is not in /usr/lib or /System/Library", layout->getID().name);
+	else if ( rootless == true && trustErr != 0 )
+		asprintf(msg, "can't put %s in shared cache because it is not trusted: %s", layout->getFilePath(), strerror(trustErr));
 	else if ( layout->hasDynamicLookupLinkage() )
 		asprintf(msg, "can't put %s in shared cache because it was built with '-undefined dynamic_lookup'", layout->getID().name);
 	else if ( layout->hasMainExecutableLookupLinkage() )
 		asprintf(msg, "can't put %s in shared cache because it was built with '-bundle_loader'", layout->getID().name);
-	//else if ( ! layout->hasDyldInfo() )
-	//	asprintf(msg, "can't put %s in shared cache because it was built for older OS", layout->getID().name);
 	else
 		return true;
 	return false;
@@ -638,6 +717,7 @@
 	uint32_t	add(const char* str);
 	uint32_t	addUnique(const char* str);
 	const char* stringAtIndex(uint32_t) const;
+	
 private:
 	typedef std::unordered_map<const char*, uint32_t, CStringHash, CStringEquals> StringToOffset;
 
@@ -649,7 +729,7 @@
 
 
 StringPool::StringPool() 
-	: fBufferUsed(0), fBufferAllocated(48*1024*1024)
+	: fBufferUsed(0), fBufferAllocated(128*1024*1024)
 {
 	fBuffer = (char*)malloc(fBufferAllocated);
 }
@@ -708,10 +788,11 @@
 class SharedCache
 {
 public:
-							SharedCache(ArchGraph* graph, const char* rootPath, const char* overlayPath, const char* cacheDir, bool explicitCacheDir,
+							SharedCache(ArchGraph* graph, const char* rootPath, const std::vector<const char*>& overlayPaths, const char* cacheDir, bool explicitCacheDir,
 											bool alphaSort, bool verify, bool optimize, uint64_t dyldBaseAddress);
 	bool					update(bool force, bool optimize, bool deleteExistingFirst, int archIndex, 
 										int archCount, bool keepSignatures, bool dontMapLocalSymbols);
+	void					writeCacheFile(const char *cacheFilePath, uint8_t *cacheFileBuffer, uint32_t cacheFileSize, bool deleteOldCache);
 	static const char*		cacheFileSuffix(bool optimized, const char* archName);
 
     // vm address = address AS WRITTEN into the cache
@@ -742,8 +823,11 @@
 	static uint64_t			sharedRegionStartReadOnlyAddress(uint64_t, uint64_t);
 	static uint64_t			getWritableSegmentNewAddress(uint64_t proposedNewAddress, uint64_t originalAddress, uint64_t executableSlide);
 	static bool				addCacheSlideInfo();
+	static uint64_t			pathHash(const char*);
 	
 	static uint64_t			pageAlign(uint64_t addr);
+	static uint64_t			regionAlign(uint64_t addr);
+	static uint64_t			pageAlign4KB(uint64_t addr);
 	void					assignNewBaseAddresses(bool verify);
 
 	struct LayoutInfo {
@@ -845,7 +929,7 @@
     SharedCache<A>* const			fCache;
     const macho_section<P>* const	fSection;
     pint_t * const					fBase;
-    uint64_t						fCount;
+    pint_t							fCount;
 
 public:
     PointerSection(SharedCache<A>* cache, const macho_header<P>* header, 
@@ -857,25 +941,25 @@
     {
     }
 
-    uint64_t count() const { return fCount; }
-
-    uint64_t getUnmapped(uint64_t index) const {
+    pint_t count() const { return fCount; }
+
+    pint_t getVMAddress(pint_t index) const {
         if (index >= fCount) throwf("index out of range");
         return P::getP(fBase[index]);
     }
 
-    T get(uint64_t index) const { 
-        return (T)fCache->mappedAddressForVMAddress(getUnmapped(index));
+    T get(pint_t index) const { 
+        return (T)fCache->mappedAddressForVMAddress(getVMAddress(index));
     }
 
-    void set(uint64_t index, uint64_t value) {
+    void setVMAddress(pint_t index, pint_t value) {
         if (index >= fCount) throwf("index out of range");
         P::setP(fBase[index], value);
     }
 	
     void removeNulls() {
-        uint64_t shift = 0;
-        for (uint64_t i = 0; i < fCount; i++) {
+        pint_t shift = 0;
+        for (pint_t i = 0; i < fCount; i++) {
             pint_t value = fBase[i];
             if (value) {
                 fBase[i-shift] = value;
@@ -927,40 +1011,54 @@
 template <>	 cpu_type_t	SharedCache<x86>::arch()	{ return CPU_TYPE_I386; }
 template <>	 cpu_type_t	SharedCache<x86_64>::arch()	{ return CPU_TYPE_X86_64; }
 template <>	 cpu_type_t	SharedCache<arm>::arch()	{ return CPU_TYPE_ARM; }
+template <>	 cpu_type_t	SharedCache<arm64>::arch()	{ return CPU_TYPE_ARM64; }
 
 template <>	 uint64_t	SharedCache<x86>::sharedRegionStartAddress()			{ return 0x90000000; }
 template <>	 uint64_t	SharedCache<x86_64>::sharedRegionStartAddress()			{ return 0x7FFF80000000LL; }
 template <>	 uint64_t	SharedCache<arm>::sharedRegionStartAddress()			{ return ARM_SHARED_REGION_START; }
+template <>	 uint64_t	SharedCache<arm64>::sharedRegionStartAddress()			{ return ARM64_SHARED_REGION_START; }
 
 template <>	 uint64_t	SharedCache<x86>::sharedRegionSize()					{ return 0x20000000; }
 template <>	 uint64_t	SharedCache<x86_64>::sharedRegionSize()					{ return 0x40000000; }
 template <>	 uint64_t	SharedCache<arm>::sharedRegionSize()					{ return ARM_SHARED_REGION_SIZE; }
+template <>	 uint64_t	SharedCache<arm64>::sharedRegionSize()					{ return ARM64_SHARED_REGION_SIZE; }
 
 template <>	 uint64_t	SharedCache<x86>::sharedRegionStartWritableAddress(uint64_t exEnd)			{ return exEnd + 0x04000000; }
 template <>	 uint64_t	SharedCache<x86_64>::sharedRegionStartWritableAddress(uint64_t exEnd)		{ return 0x7FFF70000000LL; }
 template <>	 uint64_t	SharedCache<arm>::sharedRegionStartWritableAddress(uint64_t exEnd)			{ return (exEnd + 16383) & (-16384); }
+template <>	 uint64_t	SharedCache<arm64>::sharedRegionStartWritableAddress(uint64_t exEnd)		{ return exEnd; }
 
 template <>	 uint64_t	SharedCache<x86>::sharedRegionStartReadOnlyAddress(uint64_t wrEnd, uint64_t exEnd)	 { return wrEnd + 0x04000000; }
 template <>	 uint64_t	SharedCache<x86_64>::sharedRegionStartReadOnlyAddress(uint64_t wrEnd, uint64_t exEnd){ return exEnd; }
 template <>	 uint64_t	SharedCache<arm>::sharedRegionStartReadOnlyAddress(uint64_t wrEnd, uint64_t exEnd)	 { return (wrEnd + 16383) & (-16384); }
-
+template <>	 uint64_t	SharedCache<arm64>::sharedRegionStartReadOnlyAddress(uint64_t wrEnd, uint64_t exEnd) { return (wrEnd + 16383) & (-16384); }
 
 template <>	 const char*	SharedCache<x86>::archName()	{ return "i386"; }
 template <>	 const char*	SharedCache<x86_64>::archName()	{ return "x86_64"; }
 template <>	 const char*	SharedCache<arm>::archName()	{ return "arm"; }
+template <>	 const char*	SharedCache<arm64>::archName()	{ return "arm64"; }
 
 template <>	 const char*	SharedCache<x86>::cacheFileSuffix(bool, const char* archName)	{ return archName; }
 template <>	 const char*	SharedCache<x86_64>::cacheFileSuffix(bool, const char* archName){ return archName; }
 template <>	 const char*	SharedCache<arm>::cacheFileSuffix(bool, const char* archName)	{ return archName; }
-
+template <>	 const char*	SharedCache<arm64>::cacheFileSuffix(bool, const char* archName)	{ return archName; }
 
 template <>  uint64_t		SharedCache<x86>::pageAlign(uint64_t addr)    { return ( (addr + 4095) & (-4096) ); }
 template <>  uint64_t		SharedCache<x86_64>::pageAlign(uint64_t addr) { return ( (addr + 4095) & (-4096) ); }
 template <>  uint64_t		SharedCache<arm>::pageAlign(uint64_t addr)    { return ( (addr + 4095) & (-4096) ); }
-
+template <>  uint64_t		SharedCache<arm64>::pageAlign(uint64_t addr)  { return ( (addr + 16383) & (-16384) ); }
+
+template <>  uint64_t		SharedCache<x86>::regionAlign(uint64_t addr)    { return ( (addr + 4095) & (-4096) ); }
+template <>  uint64_t		SharedCache<x86_64>::regionAlign(uint64_t addr) { return ( (addr + 4095) & (-4096) ); }
+template <>  uint64_t		SharedCache<arm>::regionAlign(uint64_t addr)    { return ( (addr + 16383) & (-16384) ); }
+template <>  uint64_t		SharedCache<arm64>::regionAlign(uint64_t addr)  { return ( (addr + 16383) & (-16384) ); }
+
+
+template <typename A>  
+uint64_t SharedCache<A>::pageAlign4KB(uint64_t addr)    { return ( (addr + 4095) & (-4096) ); }
 
 template <typename A>
-SharedCache<A>::SharedCache(ArchGraph* graph, const char* rootPath, const char* overlayPath, const char* cacheDir, bool explicitCacheDir, bool alphaSort, bool verify, bool optimize, uint64_t dyldBaseAddress) 
+SharedCache<A>::SharedCache(ArchGraph* graph, const char* rootPath, const std::vector<const char*>& overlayPaths, const char* cacheDir, bool explicitCacheDir, bool alphaSort, bool verify, bool optimize, uint64_t dyldBaseAddress) 
   : fArchGraph(graph), fVerify(verify), fExistingIsNotUpToDate(true), 
 	fCacheFileInFinalLocation(rootPath[0] == '\0'), fCacheFilePath(NULL),
 	fExistingCacheForVerification(NULL), fDyldBaseAddress(dyldBaseAddress),
@@ -986,8 +1084,12 @@
 		LayoutInfo temp;
 		temp.layout = lib;
 		temp.info.address = 0;
+		temp.info.inode = lib->getInode();
 		temp.info.modTime = lib->getLastModTime();
-		temp.info.inode = lib->getInode();
+		if ( iPhoneOS ) {
+			temp.info.inode = pathHash(lib->getID().name);
+			temp.info.modTime = 0;
+		}
 		temp.info.pathFileOffset = lib->getNameFileOffset();  // for now this is the offset within the dylib
 		for(ArchGraph::StringToString::iterator ait = aliases.begin(); ait != aliases.end(); ++ait) {
 			if ( strcmp(ait->second, lib->getID().name) == 0 ) {
@@ -1009,8 +1111,9 @@
 	if ( explicitCacheDir ) {
 		fCacheFilePath = strdup(cachePathCanonical);
 	}
-	else if ( overlayPath[0] != '\0' ) {
-		strcpy(cachePath, overlayPath);
+	else if ( overlayPaths.size() == 1 ) {
+		// if no -cache_dir and exactly on -overlay, write cache file into that overlay dir
+		strcpy(cachePath, overlayPaths[0]);
 		strcat(cachePath, "/");
 		strcat(cachePath, cachePathCanonical);
 		fCacheFilePath = strdup(cachePath);
@@ -1024,8 +1127,13 @@
 	else {
 		fCacheFilePath = strdup(cachePathCanonical);
 	}
-	if ( overlayPath[0] != '\0' ) {
-		// in overlay mode if there already is a cache file in the overlay
+
+	// If the path we are writing to is trusted then our sources need to be trusted
+	// <rdar://problem/21166835> Can't update the update_dyld_shared_cache on a non-boot volume
+	rootless = rootless_check_trusted(fCacheFilePath);
+
+	if ( overlayPaths.size() == 1 ) {
+		// in overlay mode if there already is a cache file in the overlay,
 		// check if it is up to date.  
 		struct stat stat_buf;
 		if ( stat(fCacheFilePath, &stat_buf) == 0 ) {
@@ -1082,6 +1190,10 @@
 			temp.aliases.clear();
 			temp.aliases.push_back(aliasPath);
 			temp.info.pathFileOffset = fHeaderSize; 
+			if ( iPhoneOS ) {
+				temp.info.inode = pathHash(aliasPath); 
+				temp.info.modTime = 0;
+			}
 			fDylibAliases.push_back(temp);
 			fHeaderSize += strlen(aliasPath)+1;
 		}
@@ -1119,6 +1231,15 @@
 	return proposedNewAddress;
 }
 
+template <typename A>
+uint64_t SharedCache<A>::pathHash(const char* path)
+{
+	uint64_t sum = 0;
+	for (const char* s=path; *s != '\0'; ++s)
+		sum += sum*4 + *s;
+	return sum;
+}
+	
 
 template <typename A>
 void SharedCache<A>::assignNewBaseAddresses(bool verify)
@@ -1132,6 +1253,8 @@
 			MachOLayoutAbstraction::Segment& seg = segs[i];
 			seg.reset();
 			if ( seg.executable() && !seg.writable() ) {
+				// <rdar://problem/15947734> Some dylib require extra alignment
+				currentExecuteAddress = (currentExecuteAddress + seg.alignment() - 1) & (-seg.alignment());
 				// __TEXT segment
 				if ( it->info.address == 0 )
 					it->info.address = currentExecuteAddress;
@@ -1140,24 +1263,87 @@
 			}
 		}
 	}
-
-	// layout DATA for dylibs
+	// align __TEXT region
+	currentExecuteAddress = regionAlign(currentExecuteAddress);
+
+#define DENSE_PACK 0
+	// layout __DATA* segments
+	std::vector<MachOLayoutAbstraction::Segment*> dataSegs;
+	std::vector<MachOLayoutAbstraction::Segment*> dataConstSegs;
+	std::vector<MachOLayoutAbstraction::Segment*> dataDirtySegs;
 	const uint64_t startWritableAddress = sharedRegionStartWritableAddress(currentExecuteAddress);
 	uint64_t currentWritableAddress = startWritableAddress;
-	for(typename std::vector<LayoutInfo>::iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
-		std::vector<MachOLayoutAbstraction::Segment>& segs = ((MachOLayoutAbstraction*)(it->layout))->getSegments();
-		for (int i=0; i < segs.size(); ++i) {
-			MachOLayoutAbstraction::Segment& seg = segs[i];
-			seg.reset();
+	for (const LayoutInfo& info : fDylibs ) {
+		for (MachOLayoutAbstraction::Segment& seg : ((MachOLayoutAbstraction*)(info.layout))->getSegments()) {
 			if ( seg.writable() ) {
 				if ( seg.executable() ) 
 					throw "found writable and executable segment";
-				// __DATA segment
-				seg.setNewAddress(currentWritableAddress);
-				currentWritableAddress = pageAlign(seg.newAddress() + seg.size());
-			}
-		}
-	}
+				seg.reset();
+				if ( strcmp(seg.name(), "__DATA_CONST") == 0 )
+					dataConstSegs.push_back(&seg);
+				else if ( strcmp(seg.name(), "__DATA_DIRTY") == 0 )
+					dataDirtySegs.push_back(&seg);
+				else
+					dataSegs.push_back(&seg);
+			}
+		}
+	}
+	// coalesce all __DATA_CONST segments
+	for (MachOLayoutAbstraction::Segment* seg : dataConstSegs) {
+	#if DENSE_PACK
+		// start segment at needed alignment
+		currentWritableAddress = (currentWritableAddress + seg->sectionsAlignment() - 1) & (-seg->sectionsAlignment());
+		seg->setNewAddress(currentWritableAddress);
+		// pack together
+		uint64_t justSectionsSize = seg->sectionsSize();
+		currentWritableAddress = seg->newAddress() + justSectionsSize;
+		seg->setSize(justSectionsSize);
+		if ( seg->fileSize() > justSectionsSize )
+			seg->setFileSize(justSectionsSize);
+	#else
+		seg->setNewAddress(currentWritableAddress);
+		// pack to 4KB pages
+		currentWritableAddress = pageAlign4KB(seg->newAddress() + seg->size());
+	#endif
+	}
+	#if DENSE_PACK
+	currentWritableAddress = pageAlign4KB(currentWritableAddress);
+	#endif
+	// coalesce all __DATA segments
+	for (MachOLayoutAbstraction::Segment* seg : dataSegs) {
+	#if DENSE_PACK
+		// start segment at needed alignment
+		currentWritableAddress = (currentWritableAddress + seg->sectionsAlignment() - 1) & (-seg->sectionsAlignment());
+		seg->setNewAddress(currentWritableAddress);
+		// pack together
+		uint64_t justSectionsSize = seg->sectionsSize();
+		currentWritableAddress = seg->newAddress() + justSectionsSize;
+		seg->setSize(justSectionsSize);
+		if ( seg->fileSize() > justSectionsSize )
+			seg->setFileSize(justSectionsSize);
+	#else
+		seg->setNewAddress(currentWritableAddress);
+		// pack to 4KB pages
+		currentWritableAddress = pageAlign4KB(seg->newAddress() + seg->size());
+	#endif
+	}
+	#if DENSE_PACK
+	currentWritableAddress = pageAlign4KB(currentWritableAddress);
+	#endif
+ 	// coalesce all __DATA_DIRTY segments
+	for (MachOLayoutAbstraction::Segment* seg : dataDirtySegs) {
+		// start segment at needed alignment
+		currentWritableAddress = (currentWritableAddress + seg->sectionsAlignment() - 1) & (-seg->sectionsAlignment());
+		seg->setNewAddress(currentWritableAddress);
+		// pack together
+		uint64_t justSectionsSize = seg->sectionsSize();
+		currentWritableAddress = seg->newAddress() + justSectionsSize;
+		seg->setSize(justSectionsSize);
+		if ( seg->fileSize() > justSectionsSize )
+			seg->setFileSize(justSectionsSize);
+	}
+	// align __DATA region
+	currentWritableAddress = regionAlign(currentWritableAddress);
 
 	// layout all read-only (but not LINKEDIT) segments
 	const uint64_t startReadOnlyAddress = sharedRegionStartReadOnlyAddress(currentWritableAddress, currentExecuteAddress);
@@ -1175,6 +1361,7 @@
 	}	
 
 	// layout all LINKEDIT segments at end of all read-only segments
+	currentReadOnlyAddress = regionAlign(currentReadOnlyAddress); // <rdar://problem/16491435>
 	fLinkEditsStartAddress = currentReadOnlyAddress;
 	fFirstLinkEditSegment = NULL;
 	for(typename std::vector<LayoutInfo>::iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
@@ -1283,7 +1470,7 @@
 	strcpy(&temp[15-strlen(archPairName)], archPairName);
 	if ( strcmp(header->magic(), temp) != 0 ) {
 		if ( fVerify ) {
-			fprintf(stderr, "update_dyld_shared_cache[%u] cannot verify %s because current cache file has invalid header\n", getpid(), archName());
+			fprintf(stderr, "update_dyld_shared_cache[%u] cannot verify %s because current cache file has invalid header\n", getpid(), archPairName);
 			return false;
 		}
 		else {
@@ -1294,11 +1481,11 @@
 	// not valid if count of images does not match current images needed
 	if ( header->imagesCount() != (fDylibs.size()+aliasCount) ) {
 		if ( fVerify ) {
-			fprintf(stderr, "update_dyld_shared_cache[%u] cannot verify %s because current cache file contains a different set of dylibs\n", getpid(), archName());
+			fprintf(stderr, "update_dyld_shared_cache[%u] cannot verify %s because current cache file contains a different set of dylibs\n", getpid(), archPairName);
 			return false;
 		}
 		else {
-			fprintf(stderr, "update_dyld_shared_cache[%u] updating %s cache because current cache file contains a different set of dylibs\n", getpid(), archName());
+			fprintf(stderr, "update_dyld_shared_cache[%u] updating %s cache because current cache file contains a different set of dylibs\n", getpid(), archPairName);
 			return true;
 		}
 	}
@@ -1317,7 +1504,7 @@
 			if ( fVerify ) {
 				if ( cacheEntry->pathFileOffset() > textSize ) {
 					throwf("update_dyld_shared_cache[%u]: for arch=%s, image entries corrupt, bad path offset in %s\n", 
-								getpid(), archName(), it->layout->getID().name);
+								getpid(), archPairName, it->layout->getID().name);
 				}
 				// in -verify mode, just match by path and warn if file looks different
 				if ( strcmp((char*)cache+cacheEntry->pathFileOffset(), it->layout->getID().name) == 0 ) {
@@ -1325,7 +1512,7 @@
 					sortingMap[it->layout] = cacheEntry-imagesStart;
 					if ( (cacheEntry->inode() != it->info.inode) || (cacheEntry->modTime() != it->info.modTime) ) {
 						fprintf(stderr, "update_dyld_shared_cache[%u] warning: for arch=%s, %s has changed since cache was built\n", 
-								getpid(), archName(), it->layout->getID().name);
+								getpid(), archPairName, it->layout->getID().name);
 					}
 					break;
 				}
@@ -1346,10 +1533,10 @@
 		}
 		if ( !found ) {
 			if ( fVerify ) {
-				throwf("update_dyld_shared_cache[%u] can't verify %s cache because %s is not in existing cache\n", getpid(), archName(), it->layout->getID().name);
+				throwf("update_dyld_shared_cache[%u] can't verify %s cache because %s is not in existing cache\n", getpid(), archPairName, it->layout->getID().name);
 			}
 			else {
-				fprintf(stderr, "update_dyld_shared_cache[%u] updating %s cache because dylib at %s has changed\n", getpid(), archName(), it->layout->getID().name);
+				fprintf(stderr, "update_dyld_shared_cache[%u] updating %s cache because dylib at %s has changed\n", getpid(), archPairName, it->layout->getID().name);
 				return true;
 			}
 		}
@@ -1762,7 +1949,7 @@
 }
 
 template <typename A>
-void LinkEditOptimizer<A>::updateLoadCommands(uint64_t newVMAddress, uint64_t size, uint32_t stringPoolOffset, 
+void LinkEditOptimizer<A>::updateLoadCommands(uint64_t newVMAddress, uint64_t leSize, uint32_t stringPoolOffset, 
 												uint32_t linkEditsFileOffset, bool keepSignatures)
 {
 	// set LINKEDIT segment commmand to new merged LINKEDIT
@@ -1774,22 +1961,39 @@
 			macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
 			if ( strcmp(seg->segname(), "__LINKEDIT") == 0 ) {
 				seg->set_vmaddr(newVMAddress);
-				seg->set_vmsize(size);
-				seg->set_filesize(size);
+				seg->set_vmsize(leSize);
+				seg->set_filesize(leSize);
 				seg->set_fileoff(linkEditsFileOffset);
 			}
-			// don't alter __TEXT until <rdar://problem/7022345> is fixed
-			else if ( strcmp(seg->segname(), "__TEXT") != 0 ) {
-				// update all other segments fileoff to be offset from start of cache file
+			else {
 				pint_t oldFileOff = seg->fileoff();
-				seg->set_fileoff(fSharedCache.cacheFileOffsetForVMAddress(seg->vmaddr()));
+				// don't alter __TEXT until <rdar://problem/7022345> is fixed
+				if ( strcmp(seg->segname(), "__TEXT") != 0 ) {
+					// update all other segments fileoff to be offset from start of cache file
+					seg->set_fileoff(fSharedCache.cacheFileOffsetForVMAddress(seg->vmaddr()));
+				}
 				pint_t fileOffsetDelta = seg->fileoff() - oldFileOff;
+				const MachOLayoutAbstraction::Segment* layoutSeg = fLayout.getSegment(seg->segname());
+				if ( layoutSeg != NULL ) {
+					//if ( seg->filesize() != layoutSeg->fileSize() ) {
+					//	fprintf(stderr, "LC filesize=0x%08llX, trimmed seg file size=0x%08llX, seg=%s, path=%s\n", 
+					//					seg->filesize(), layoutSeg->fileSize(), seg->segname(), fLayout.getFilePath());
+					//}
+					//if ( seg->vmsize() != layoutSeg->size() ) {
+					//	fprintf(stderr, "LC   vmsize=0x%08llX, trimmed seg      size=0x%08llX, seg=%s, path=%s\n", 
+					//					seg->vmsize(), layoutSeg->size(), seg->segname(), fLayout.getFilePath());
+					//}
+					seg->set_vmsize(layoutSeg->size());
+					seg->set_filesize(layoutSeg->fileSize());
+				}
 				// update all sections in this segment
 				macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
 				macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
 				for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
 					if ( sect->offset() != 0 )
 						sect->set_offset(sect->offset()+fileOffsetDelta);
+						//if ( (sect->flags() & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS )
+						//	fprintf(stderr, "found initializer(s) in %s\n", fLayout.getFilePath());
 				}
 			}
 		}
@@ -1854,6 +2058,7 @@
 		switch ( srcCmd->cmd() ) {
 			case LC_SEGMENT_SPLIT_INFO:
 			case LC_DYLIB_CODE_SIGN_DRS:
+			case LC_RPATH:
 				// don't copy
 				break;
 			case LC_CODE_SIGNATURE:
@@ -1986,7 +2191,7 @@
 
 	// update load commands so that all dylibs shared different areas of the same LINKEDIT segment
 	for(typename std::vector<LinkEditOptimizer<A>*>::iterator it = optimizers.begin(); it != optimizers.end(); ++it) {
-		(*it)->updateLoadCommands(fLinkEditsStartAddress, fLinkEditsTotalUnoptimizedSize, fOffsetOfOldStringPoolInCombinedLinkedit, linkEditsFileOffset, keepSignatures);
+		(*it)->updateLoadCommands(fLinkEditsStartAddress, fLinkEditsTotalOptimizedSize, fOffsetOfOldStringPoolInCombinedLinkedit, linkEditsFileOffset, keepSignatures);
 	}
 
 	//fprintf(stderr, "fLinkEditsTotalUnoptimizedSize=%llu, fLinkEditsTotalOptimizedSize=%u\n", fLinkEditsTotalUnoptimizedSize, fLinkEditsTotalOptimizedSize);
@@ -2008,10 +2213,10 @@
 				seg.setFileOffset(linkEditsFileOffset);
 			}
 		}
-	}
-	
+	}	
+		
 	// return new end of cache
-	return (uint8_t*)fFirstLinkEditSegment->mappedAddress() + fLinkEditsTotalOptimizedSize;
+	return (uint8_t*)fFirstLinkEditSegment->mappedAddress() + regionAlign(fLinkEditsTotalOptimizedSize);
 }
 
 
@@ -2096,6 +2301,140 @@
 };
 
 
+template <typename A>
+class ProtocolOptimizer
+{
+private:
+    typedef typename A::P P;
+    typedef typename A::P::uint_t pint_t;
+
+    objc_opt::string_map fProtocolNames;
+    objc_opt::protocol_map fProtocols;
+    size_t fProtocolCount;
+    size_t fProtocolReferenceCount;
+
+    friend class ProtocolReferenceWalker<A, ProtocolOptimizer<A>>;
+    pint_t visitProtocolReference(SharedCache<A>* cache, pint_t oldValue)
+    {
+        objc_protocol_t<A>* proto = (objc_protocol_t<A>*)
+            cache->mappedAddressForVMAddress(oldValue);
+        pint_t newValue = fProtocols[proto->getName(cache)];
+        if (oldValue != newValue) fProtocolReferenceCount++;
+        return newValue;
+    }
+
+public:
+
+    ProtocolOptimizer()
+        : fProtocolNames()
+        , fProtocols()
+        , fProtocolCount(0)
+        , fProtocolReferenceCount(0)
+    { }
+
+    void addProtocols(SharedCache<A>* cache, 
+                      const macho_header<P>* header)
+    {
+        PointerSection<A, objc_protocol_t<A> *> 
+            protocols(cache, header, "__DATA", "__objc_protolist");
+        
+        for (pint_t i = 0; i < protocols.count(); i++) {
+            objc_protocol_t<A> *proto = protocols.get(i);
+
+            const char *name = proto->getName(cache);
+            if (fProtocolNames.count(name) == 0) {
+                // Need a Swift demangler API in OS before we can handle this
+                if (0 == strncmp(name, "_TtP", 4)) {
+                    throw "objc protocol has Swift name";
+                }
+                if (proto->getSize() > sizeof(objc_protocol_t<A>)) {
+                    throw "objc protocol is too big";
+                }
+
+                uint64_t name_vmaddr = cache->VMAddressForMappedAddress(name);
+                uint64_t proto_vmaddr = cache->VMAddressForMappedAddress(proto);
+                fProtocolNames.insert(objc_opt::string_map::value_type(name, name_vmaddr));
+                fProtocols.insert(objc_opt::protocol_map::value_type(name, proto_vmaddr));
+                fProtocolCount++;
+            }
+        }
+    }
+
+    const char *writeProtocols(SharedCache<A>* cache, 
+                               uint8_t *& dest, size_t& remaining, 
+                               std::vector<void*>& pointersInData, 
+                               pint_t protocolClassVMAddr)
+    {
+        if (fProtocolCount == 0) return NULL;
+
+        if (protocolClassVMAddr == 0) {
+            return "libobjc's Protocol class symbol not found (metadata not optimized)";
+        }
+
+        size_t required = fProtocolCount * sizeof(objc_protocol_t<A>);
+        if (remaining < required) {
+            return "libobjc's read-write section is too small (metadata not optimized)";
+        }
+
+        for (objc_opt::protocol_map::iterator iter = fProtocols.begin();
+             iter != fProtocols.end();
+             ++iter)
+        {
+            objc_protocol_t<A>* oldProto = (objc_protocol_t<A>*)
+                cache->mappedAddressForVMAddress(iter->second);
+
+            // Create a new protocol object.
+            objc_protocol_t<A>* proto = (objc_protocol_t<A>*)dest;
+            dest += sizeof(*proto);
+            remaining -= sizeof(*proto);
+
+            // Initialize it.
+            uint32_t oldSize = oldProto->getSize();
+            memcpy(proto, oldProto, oldSize);
+            if (!proto->getIsaVMAddr()) {
+                proto->setIsaVMAddr(protocolClassVMAddr);
+            }
+            if (oldSize < sizeof(*proto)) {
+                // Protocol object is old. Populate new fields.
+                proto->setSize(sizeof(objc_protocol_t<A>));
+                // missing extendedMethodTypes is already nil
+            }
+            // Some protocol objects are big enough to have the 
+            // demangledName field but don't initialize it.
+            if (! proto->getDemangledName(cache)) {
+                proto->setDemangledName(cache, proto->getName(cache));
+            }
+            proto->setFixedUp();
+
+            // Redirect the protocol table at our new object.
+            iter->second = cache->VMAddressForMappedAddress(proto);
+
+            // Add new rebase entries.
+            proto->addPointers(pointersInData);
+        }
+        
+        return NULL;
+    }
+
+    void updateReferences(SharedCache<A>* cache, const macho_header<P>* header)
+    {
+        ProtocolReferenceWalker<A, ProtocolOptimizer<A>> refs(*this);
+        refs.walk(cache, header);
+    }
+
+    objc_opt::string_map& protocolNames() { 
+        return fProtocolNames;
+    }
+
+    objc_opt::protocol_map& protocols() { 
+        return fProtocols;
+    }
+
+    size_t protocolCount() const { return fProtocolCount; }
+    size_t protocolReferenceCount() const { return fProtocolReferenceCount; }
+};
+
+
 static int percent(size_t num, size_t denom) {
     if (denom) return (int)(num / (double)denom * 100);
     else return 100;
@@ -2115,14 +2454,17 @@
 		warn(archName(), "libobjc's optimization structure size is wrong (metadata not optimized)");
     }
 
-    // Find libobjc's empty sections to fill in
+    // Find libobjc's empty sections to fill in.
+    // Find libobjc's list of pointers for us to use.
     const macho_section<P> *optROSection = NULL;
     const macho_section<P> *optRWSection = NULL;
+    const macho_section<P> *optPointerListSection = NULL;
 	for(typename std::vector<LayoutInfo>::const_iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
-        if ( strstr(it->layout->getFilePath(), "libobjc") != NULL ) {
+        if ( strstr(it->layout->getFilePath(), "/libobjc.") != NULL ) {
 			const macho_header<P>* mh = (const macho_header<P>*)(*it->layout).getSegments()[0].mappedAddress();
 			optROSection = mh->getSection("__TEXT", "__objc_opt_ro");
 			optRWSection = mh->getSection("__DATA", "__objc_opt_rw");
+			optPointerListSection = mh->getSection("__DATA", "__objc_opt_ptrs");
 			break;
 		}
 	}
@@ -2136,6 +2478,11 @@
 		warn(archName(), "libobjc's read/write section missing (metadata not optimized)");
 		return;
 	}
+	
+	if ( optPointerListSection == NULL ) {
+		warn(archName(), "libobjc's pointer list section missing (metadata not optimized)");
+		return;
+	}
 
 	uint8_t* optROData = (uint8_t*)mappedAddressForVMAddress(optROSection->addr());
     size_t optRORemaining = optROSection->size();
@@ -2155,6 +2502,12 @@
 		warn(archName(), "libobjc's read-only section version is unrecognized (metadata not optimized)");
 		return;
 	}
+
+    if (optPointerListSection->size() < sizeof(objc_opt::objc_opt_pointerlist_tt<pint_t>)) {
+        warn(archName(), "libobjc's pointer list section is too small (metadata not optimized)");
+		return;
+    }
+    const objc_opt::objc_opt_pointerlist_tt<pint_t> *optPointerList = (const objc_opt::objc_opt_pointerlist_tt<pint_t> *)mappedAddressForVMAddress(optPointerListSection->addr());
 
     // Write nothing to optROHeader until everything else is written.
     // If something fails below, libobjc will not use the section.
@@ -2255,11 +2608,57 @@
     }
 
 
+    // Unique protocols and build protocol table.
+
+    // This is SAFE: no protocol references are updated yet
+    // This must be done AFTER updating method lists.
+
+    ProtocolOptimizer<A> protocolOptimizer;
+	for(typename std::vector<LayoutInfo>::const_iterator it = sizeSortedDylibs.begin(); it != sizeSortedDylibs.end(); ++it) {
+        const macho_header<P> *mh = (const macho_header<P>*)(*it->layout).getSegments()[0].mappedAddress();
+        protocolOptimizer.addProtocols(this, mh);
+	}
+
+    pint_t protocolClassVMAddr = P::getP(optPointerList->protocolClass);
+    err = protocolOptimizer.writeProtocols(this, optRWData, optRWRemaining, 
+                                           pointersInData, protocolClassVMAddr);
+    if (err) {
+        warn(archName(), err);
+        return;
+    }
+
+    uint64_t protocoloptVMAddr = optROSection->addr() + optROSection->size() - optRORemaining;
+    objc_opt::objc_protocolopt_t *protocolopt = new(optROData) objc_opt::objc_protocolopt_t;
+    err = protocolopt->write(protocoloptVMAddr, optRORemaining, 
+                             protocolOptimizer.protocolNames(), 
+                             protocolOptimizer.protocols(), verbose);
+    if (err) {
+        warn(archName(), err);
+        return;
+    }
+    optROData += protocolopt->size();
+    optRORemaining -= protocolopt->size();
+    protocolopt->byteswap(E::little_endian), protocolopt = NULL;
+
+
+    // Redirect protocol references to the uniqued protocols.
+
+    // This is SAFE: the new protocol objects are still usable as-is.
+	for(typename std::vector<LayoutInfo>::const_iterator it = sizeSortedDylibs.begin(); it != sizeSortedDylibs.end(); ++it) {
+        const macho_header<P> *mh = (const macho_header<P>*)(*it->layout).getSegments()[0].mappedAddress();
+        protocolOptimizer.updateReferences(this, mh);
+	}
+
+
     // Repair ivar offsets.
 
     // This is SAFE: the runtime always validates ivar offsets at runtime.
 
     IvarOffsetOptimizer<A> ivarOffsetOptimizer;
+	for(typename std::vector<LayoutInfo>::const_iterator it = sizeSortedDylibs.begin(); it != sizeSortedDylibs.end(); ++it) {
+        const macho_header<P> *mh = (const macho_header<P>*)(*it->layout).getSegments()[0].mappedAddress();
+        ivarOffsetOptimizer.findGCClasses(this, mh);
+	}
 	for(typename std::vector<LayoutInfo>::const_iterator it = sizeSortedDylibs.begin(); it != sizeSortedDylibs.end(); ++it) {
         const macho_header<P> *mh = (const macho_header<P>*)(*it->layout).getSegments()[0].mappedAddress();
         ivarOffsetOptimizer.optimize(this, mh);
@@ -2285,6 +2684,7 @@
     // Success. Update RO header last.
     E::set32(optROHeader->selopt_offset, seloptVMAddr - optROSection->addr());
     E::set32(optROHeader->clsopt_offset, clsoptVMAddr - optROSection->addr());
+    E::set32(optROHeader->protocolopt_offset, protocoloptVMAddr - optROSection->addr());
     E::set32(optROHeader->headeropt_offset, hinfoVMAddr - optROSection->addr());
 
     if ( verbose ) {
@@ -2305,6 +2705,12 @@
                 "updated %zu selector references\n", 
                 archName(), uniq.count());
         fprintf(stderr, "update_dyld_shared_cache: for %s, "
+                "uniqued %zu protocols\n", 
+                archName(), protocolOptimizer.protocolCount());
+        fprintf(stderr, "update_dyld_shared_cache: for %s, "
+                "updated %zu protocol references\n", 
+                archName(), protocolOptimizer.protocolReferenceCount());
+        fprintf(stderr, "update_dyld_shared_cache: for %s, "
                 "updated %zu ivar offsets\n", 
                 archName(), ivarOffsetOptimizer.optimized());
         fprintf(stderr, "update_dyld_shared_cache: for %s, "
@@ -2397,12 +2803,108 @@
 	return true;
 }
 
+template <typename A>
+void SharedCache<A>::writeCacheFile(const char *cacheFilePath, uint8_t *cacheFileBuffer, uint32_t cacheFileSize, bool deleteOldCache) {
+	char tempCachePath[strlen(cacheFilePath)+16];
+	sprintf(tempCachePath, "%s.tmp%u", cacheFilePath, getpid());
+
+	try {
+		// install signal handlers to delete temp file if program is killed
+		sCleanupFile = tempCachePath;
+		::signal(SIGINT, cleanup);
+		::signal(SIGBUS, cleanup);
+		::signal(SIGSEGV, cleanup);
+
+		// compute UUID of whole cache
+		uint8_t digest[16];
+		CC_MD5(cacheFileBuffer, cacheFileSize, digest);
+		// <rdar://problem/6723729> uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+		digest[6] = ( digest[6] & 0x0F ) | ( 3 << 4 );
+		digest[8] = ( digest[8] & 0x3F ) | 0x80;
+		((dyldCacheHeader<E>*)cacheFileBuffer)->set_uuid(digest);
+
+		// create var/db/dyld dirs if needed
+		char dyldDirs[1024];
+		strcpy(dyldDirs, cacheFilePath);
+		char* lastSlash = strrchr(dyldDirs, '/');
+		if ( lastSlash != NULL )
+			lastSlash[1] = '\0';
+		struct stat stat_buf;
+		if ( stat(dyldDirs, &stat_buf) != 0 ) {
+			const char* afterSlash = &dyldDirs[1];
+			char* slash;
+			while ( (slash = strchr(afterSlash, '/')) != NULL ) {
+				*slash = '\0';
+				::mkdir(dyldDirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
+				*slash = '/';
+				afterSlash = slash+1;
+			}
+		}
+
+		// create temp file for cache
+		int fd = ::open(tempCachePath, O_CREAT | O_RDWR | O_TRUNC, 0644);
+		if ( fd == -1 )
+			throwf("can't create temp file %s, errno=%d", tempCachePath, errno);
+
+		// try to allocate whole cache file contiguously
+		fstore_t fcntlSpec = { F_ALLOCATECONTIG|F_ALLOCATEALL, F_PEOFPOSMODE, 0, cacheFileSize, 0 };
+		::fcntl(fd, F_PREALLOCATE, &fcntlSpec);
+
+		// write out cache file
+		if ( verbose )
+			fprintf(stderr, "update_dyld_shared_cache: writing cache to disk: %s\n", tempCachePath);
+		if ( ::pwrite(fd, cacheFileBuffer, cacheFileSize, 0) != cacheFileSize )
+			throwf("write() failure creating cache file, errno=%d", errno);
+
+		// flush to disk and close
+		int result = ::fcntl(fd, F_FULLFSYNC, NULL);
+		if ( result == -1 )
+			fprintf(stderr, "update_dyld_shared_cache: warning, fcntl(F_FULLFSYNC) failed with errno=%d for %s\n", errno, tempCachePath);
+		result = ::close(fd);
+		if ( result != 0 )
+			fprintf(stderr, "update_dyld_shared_cache: warning, close() failed with errno=%d for %s\n", errno, tempCachePath);
+
+		if ( !iPhoneOS )
+			adhoc_codesign_share_cache(tempCachePath);
+
+		if ( deleteOldCache ) {
+			const char* pathLastSlash = strrchr(cacheFilePath, '/');
+			if ( pathLastSlash != NULL ) {
+				result = ::unlink(cacheFilePath);
+				if ( result != 0 ) {
+					if ( errno != ENOENT )
+						fprintf(stderr, "update_dyld_shared_cache: warning, unable to remove existing cache %s because errno=%d\n", cacheFilePath, errno);
+				}
+			}
+		}
+
+		// move new cache file to correct location for use after reboot
+		if ( verbose )
+			fprintf(stderr, "update_dyld_shared_cache: atomically moving cache file into place: %s\n", cacheFilePath);
+		result = ::rename(tempCachePath, cacheFilePath);
+		if ( result != 0 )
+			throwf("can't swap newly create dyld shared cache file: rename(%s,%s) returned errno=%d", tempCachePath, cacheFilePath, errno);
+
+		// flush everything to disk to assure rename() gets recorded
+		sync_volume(cacheFilePath);
+
+		// restore default signal handlers
+		::signal(SIGINT, SIG_DFL);
+		::signal(SIGBUS, SIG_DFL);
+		::signal(SIGSEGV, SIG_DFL);
+	}
+	catch (...){
+		// remove temp cache file
+		::unlink(tempCachePath);
+		throw;
+	}
+}
 
 
 template <>	 bool	SharedCache<x86_64>::addCacheSlideInfo(){ return true; }
 template <>	 bool	SharedCache<arm>::addCacheSlideInfo()	{ return true; }
 template <>	 bool	SharedCache<x86>::addCacheSlideInfo()	{ return false; }
-
+template <>	 bool	SharedCache<arm64>::addCacheSlideInfo()	{ return true; }
 
 
 template <typename A>
@@ -2410,6 +2912,12 @@
 								int archCount, bool keepSignatures, bool dontMapLocalSymbols)
 {
 	bool didUpdate = false;
+	bool canEmitDevelopmentCache = true;
+	char devCacheFilePath[strlen(fCacheFilePath)+strlen(".development")];
+	char fileListFilePath[strlen(fCacheFilePath)+strlen(".list")];
+	sprintf(devCacheFilePath, "%s.development", fCacheFilePath);
+	sprintf(fileListFilePath, "%s.list", fCacheFilePath);
+	std::vector<const char *> paths;
 	
 	// already up to date?
 	if ( force || fExistingIsNotUpToDate ) {
@@ -2425,8 +2933,6 @@
 			::unlink(fCacheFilePath);
 		uint8_t* inMemoryCache = NULL;
 		uint32_t allocatedCacheSize = 0;
-		char tempCachePath[strlen(fCacheFilePath)+16];
-		sprintf(tempCachePath, "%s.tmp%u", fCacheFilePath, getpid());
 		try {
 			// allocate a memory block to hold cache
 			uint32_t cacheFileSize = 0;
@@ -2499,28 +3005,34 @@
 			const int dylibCount = fDylibs.size();
 			int dylibIndex = 0;
 			int progressIndex = 0;
+			bool foundLibSystem = false;
 			for(typename std::vector<LayoutInfo>::const_iterator it = fDylibs.begin(); it != fDylibs.end(); ++it, ++dylibIndex) {
 				const char* path = it->layout->getFilePath();
 				int src = ::open(path, O_RDONLY, 0);
 				if ( src == -1 )
-					throwf("can't open file %s, errnor=%d", it->layout->getID().name, errno);
+					throwf("can't open file %s, errno=%d", it->layout->getID().name, errno);
 				// mark source as "don't cache"
 				(void)fcntl(src, F_NOCACHE, 1);
 				// verify file has not changed since dependency analysis
 				struct stat stat_buf;
 				if ( fstat(src, &stat_buf) == -1)
 					throwf("can't stat open file %s, errno=%d", path, errno);
-				if ( (it->layout->getInode() != stat_buf.st_ino) || (it->layout->getLastModTime() != stat_buf.st_mtime) )
-					throwf("file modified during cache creation: %s", path);
-
+				if ( (it->layout->getInode() != stat_buf.st_ino) )
+					throwf("file inode changed from %llu to %llu during cache creation: %s", it->layout->getInode(), stat_buf.st_ino, path);
+				else if ( it->layout->getLastModTime() != stat_buf.st_mtime )
+					throwf("file mtime changed from 0x%lX to 0x%lX during cache creation: %s", it->layout->getLastModTime(), stat_buf.st_mtime, path);
+				if ( strcmp(it->layout->getID().name, "/usr/lib/libSystem.B.dylib") == 0 )
+					foundLibSystem = true;
 				if ( verbose )
 					fprintf(stderr, "update_dyld_shared_cache: copying %s to cache\n", it->layout->getFilePath());
 				try {
 					const std::vector<MachOLayoutAbstraction::Segment>& segs = it->layout->getSegments();
 					for (int i=0; i < segs.size(); ++i) {
 						const MachOLayoutAbstraction::Segment& seg = segs[i];
-						if ( verbose )
-							fprintf(stderr, "\t\tsegment %s, size=0x%0llX, cache address=0x%0llX\n", seg.name(), seg.fileSize(), seg.newAddress());
+						if ( verbose ) {
+							fprintf(stderr, "\t\tsegment %s, size=0x%0llX, cache address=0x%0llX, buffer address=%p\n",
+								seg.name(), seg.size(), seg.newAddress(), &inMemoryCache[cacheFileOffsetForVMAddress(seg.newAddress())]);
+						}
 						if ( seg.size() > 0 ) {
 							const uint64_t segmentSrcStartOffset = it->layout->getOffsetInUniversalFile()+seg.fileOffset();
 							const uint64_t segmentSize = seg.fileSize();
@@ -2540,6 +3052,7 @@
 					throwf("%s while copying %s to shared cache", msg, it->layout->getID().name);
 				}
 				::close(src);
+				paths.push_back(it->layout->getID().name);
 				if ( progress ) {
 					// assuming read takes 40% of time
 					int nextProgressIndex = archIndex*100+(40*dylibIndex)/dylibCount;
@@ -2548,7 +3061,9 @@
 					progressIndex = nextProgressIndex;
 				}
 			}
-						
+			if ( !foundLibSystem )
+				throw "cache would be missing required dylib /usr/lib/libSystem.B.dylib";
+
 			// set mapped address for each segment
 			for(typename std::vector<LayoutInfo>::const_iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
 				std::vector<MachOLayoutAbstraction::Segment>& segs = ((MachOLayoutAbstraction*)(it->layout))->getSegments();
@@ -2568,7 +3083,10 @@
 			for(typename std::vector<LayoutInfo>::const_iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
 				try {
 					Rebaser<A> r(*it->layout);
-					r.rebase(pointersInData);
+					if (!r.rebase(pointersInData)) {
+						canEmitDevelopmentCache = false;
+						fprintf(stderr, "update_dyld_shared_cache: Omitting development cache for %s, cannot rebase dylib into place for %s\n", archName(), it->layout->getID().name);
+					}
 					//if ( verbose )
 					//	fprintf(stderr, "update_dyld_shared_cache: for %s, rebasing dylib into cache for %s\n", archName(), it->layout->getID().name);
 				}
@@ -2584,13 +3102,12 @@
 			std::vector<Binder<A>*> binders;
 			for(typename std::vector<LayoutInfo>::const_iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
 				//fprintf(stderr, "binding %s\n", it->layout->getID().name);
-				Binder<A>* binder = new Binder<A>(*it->layout, fDyldBaseAddress);
+				Binder<A>* binder = new Binder<A>(*it->layout);
 				binders.push_back(binder);
 				// only add dylibs to map
 				if ( it->layout->getID().name != NULL )
 					map[it->layout->getID().name] = binder;
 			}
-  			
 			// tell each Binder about the others
 			for(typename std::vector<Binder<A>*>::iterator it = binders.begin(); it != binders.end(); ++it) {
 				(*it)->setDependentBinders(map);
@@ -2606,6 +3123,46 @@
 					throwf("%s in %s", msg, (*it)->getDylibID());
 				}
 			}
+
+			for(typename std::vector<LayoutInfo>::iterator it = fDylibs.begin(); it != fDylibs.end(); ++it) {
+				const macho_header<P>* fHeader = (const macho_header<P>*)it->layout->getSegments()[0].mappedAddress();
+				const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
+				const uint32_t cmd_count = fHeader->ncmds();
+				const macho_load_command<P>* cmd = cmds;
+				macho_dyld_info_command<P>*					fDyldInfo;
+				uint64_t originalLinkEditVMAddr = 0;
+				for (uint32_t i = 0; i < cmd_count; ++i) {
+					if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
+						macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
+						if ( strcmp(seg->segname(), "__LINKEDIT") != 0 ) {
+							pint_t oldFileOff = seg->fileoff();
+							originalLinkEditVMAddr += seg->vmsize();
+							// don't alter __TEXT until <rdar://problem/7022345> is fixed
+							if ( strcmp(seg->segname(), "__TEXT") != 0 ) {
+								// update all other segments fileoff to be offset from start of cache file
+								seg->set_fileoff(cacheFileOffsetForVMAddress(seg->vmaddr()));
+							}
+							pint_t fileOffsetDelta = seg->fileoff() - oldFileOff;
+							const MachOLayoutAbstraction::Segment* layoutSeg = it->layout->getSegment(seg->segname());
+							if ( layoutSeg != NULL ) {
+								seg->set_vmsize(layoutSeg->size());
+								seg->set_filesize(layoutSeg->fileSize());
+							}
+							// update all sections in this segment
+							macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
+							macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
+							for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
+								if ( sect->offset() != 0 )
+									sect->set_offset(sect->offset()+fileOffsetDelta);
+							}
+						}
+					} else if (cmd->cmd() == LC_DYLD_INFO || cmd->cmd() == LC_DYLD_INFO_ONLY) {
+						fDyldInfo = (macho_dyld_info_command<P>*)cmd;
+					}
+					cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+				}
+			}
+
 			// optimize binding
 			for(typename std::vector<Binder<A>*>::iterator it = binders.begin(); it != binders.end(); ++it) {
 				try {
@@ -2615,6 +3172,7 @@
 					throwf("%s in %s", msg, (*it)->getDylibID());
 				}
 			}
+
 			// delete binders
 			for(typename std::vector<Binder<A>*>::iterator it = binders.begin(); it != binders.end(); ++it) {
 				delete *it;
@@ -2622,9 +3180,11 @@
 	
 			// merge/optimize all LINKEDIT segments
 			if ( optimize ) {
-				//fprintf(stderr, "update_dyld_shared_cache: original cache file size %uMB\n", cacheFileSize/(1024*1024));
+				if ( verbose )
+					fprintf(stderr, "update_dyld_shared_cache: original cache file size %uMB\n", cacheFileSize/(1024*1024));
 				cacheFileSize = (this->optimizeLINKEDIT(keepSignatures, dontMapLocalSymbols) - inMemoryCache);
-				//fprintf(stderr, "update_dyld_shared_cache: optimized cache file size %uMB\n", cacheFileSize/(1024*1024));
+				if ( verbose )
+					fprintf(stderr, "update_dyld_shared_cache: optimized cache file size %uMB\n", cacheFileSize/(1024*1024));
 				// update header to reduce mapping size
 				dyldCacheHeader<E>* cacheHeader = (dyldCacheHeader<E>*)inMemoryCache;
 				dyldCacheFileMapping<E>* mappings = (dyldCacheFileMapping<E>*)&inMemoryCache[sizeof(dyldCacheHeader<E>)];
@@ -2637,7 +3197,22 @@
 				//		header->codeSignatureOffset(), fMappings.back().sfm_address + fMappings.back().sfm_size);
 				header->set_codeSignatureOffset(fMappings.back().sfm_file_offset + fMappings.back().sfm_size);
 			}
-			
+
+			// dump dev cache with optimized linkedit, but not ObjC optimizations
+			if (iPhoneOS && canEmitDevelopmentCache) {
+				int fileListFD = ::open(fileListFilePath, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+				if ( fileListFD != -1 ) {
+					for (const char* path : paths) {
+						write(fileListFD, path, strlen(path)+1);
+						write(fileListFD, "\n", 1);
+					}
+					close(fileListFD);
+				}
+
+				((dyldCacheHeader<E>*)inMemoryCache)->set_cacheType(1);
+				writeCacheFile(devCacheFilePath, inMemoryCache, cacheFileSize, fCacheFileInFinalLocation);
+			}
+
 			// unique objc selectors and update other objc metadata
             if ( optimize ) {
 				optimizeObjC(pointersInData);
@@ -2704,7 +3279,7 @@
 				}
 				slideInfo->set_entries_count(entry_count);
 	
-				int slideInfoPageSize = pageAlign(slideInfo->entries_offset() + entry_count*entry_size);
+				int slideInfoPageSize = regionAlign(slideInfo->entries_offset() + entry_count*entry_size);
 				cacheFileSize += slideInfoPageSize;
 			
 				// update mappings to increase RO size
@@ -2723,19 +3298,6 @@
 				
 				// copy compressed into into buffer
 				memcpy(&inMemoryCache[cacheHeader->slideInfoOffset()], slideInfo, slideInfoPageSize);	
-			}
-			
-			// make sure after all optimizations, that whole cache file fits into shared region address range
-			{
-				dyldCacheHeader<E>* cacheHeader = (dyldCacheHeader<E>*)inMemoryCache;
-				dyldCacheFileMapping<E>* mappings = (dyldCacheFileMapping<E>*)&inMemoryCache[cacheHeader->mappingOffset()];
-				for (int i=0; i < cacheHeader->mappingCount(); ++i) {
-					uint64_t endAddr = mappings[i].address() + mappings[i].size();
-					if ( endAddr > (sharedRegionStartAddress() + sharedRegionSize()) ) {
-						throwf("update_dyld_shared_cache[%u] for arch=%s, shared cache will not fit in address space: 0x%llX\n",
-							getpid(), fArchGraph->archName(), endAddr);
-					}
-				}
 			}
 			
 			// append local symbol info in an unmapped region
@@ -2773,7 +3335,7 @@
 				
 				// update state
 				fUnmappedLocalSymbolsSize = pageAlign(stringsOffset + stringsSize);
-				cacheFileSize = localSymbolsOffset + fUnmappedLocalSymbolsSize;
+				cacheFileSize = regionAlign(localSymbolsOffset + fUnmappedLocalSymbolsSize);
 				
 				// update header to show location of slidePointers
 				dyldCacheHeader<E>* cacheHeader = (dyldCacheHeader<E>*)inMemoryCache;
@@ -2782,13 +3344,20 @@
 				cacheHeader->set_codeSignatureOffset(cacheFileSize);
 			}
 			
-			// compute UUID of whole cache
-			uint8_t digest[16];
-			CC_MD5(inMemoryCache, cacheFileSize, digest);
-			// <rdar://problem/6723729> uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
-			digest[6] = ( digest[6] & 0x0F ) | ( 3 << 4 );
-			digest[8] = ( digest[8] & 0x3F ) | 0x80;
-			((dyldCacheHeader<E>*)inMemoryCache)->set_uuid(digest);
+			// make sure after all optimizations, that whole cache file fits into shared region address range
+			{
+				dyldCacheHeader<E>* cacheHeader = (dyldCacheHeader<E>*)inMemoryCache;
+				dyldCacheFileMapping<E>* mappings = (dyldCacheFileMapping<E>*)&inMemoryCache[cacheHeader->mappingOffset()];
+				// <rdar://problem/16128830> incorporate code signature size into overflow check
+				uint32_t estCodeSigSize = regionAlign(cacheFileSize/200); // guess 0.5% for code signature
+				for (int i=0; i < cacheHeader->mappingCount(); ++i) {
+					uint64_t endAddr = mappings[i].address() + mappings[i].size() + estCodeSigSize;
+					if ( endAddr > (sharedRegionStartAddress() + sharedRegionSize()) ) {
+						throwf("update_dyld_shared_cache[%u] for arch=%s, shared cache will not fit in shared regions address space.  Overflow amount: %lluKB\n",
+							getpid(), fArchGraph->archName(), (endAddr-(sharedRegionStartAddress() + sharedRegionSize()))/1024);
+					}
+				}
+			}
 			
 			if ( fVerify ) {
 				// if no existing cache, say so
@@ -2852,98 +3421,9 @@
 				}
 			}
 			else {
-				// install signal handlers to delete temp file if program is killed 
-				sCleanupFile = tempCachePath;
-				::signal(SIGINT, cleanup);
-				::signal(SIGBUS, cleanup);
-				::signal(SIGSEGV, cleanup);
-				
-				// create var/db/dyld dirs if needed
-				char dyldDirs[1024];
-				strcpy(dyldDirs, fCacheFilePath);
-				char* lastSlash = strrchr(dyldDirs, '/');
-				if ( lastSlash != NULL )
-					lastSlash[1] = '\0';
-				struct stat stat_buf;
-				if ( stat(dyldDirs, &stat_buf) != 0 ) {
-					const char* afterSlash = &dyldDirs[1];
-					char* slash;
-					while ( (slash = strchr(afterSlash, '/')) != NULL ) {
-						*slash = '\0';
-						::mkdir(dyldDirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
-						*slash = '/';
-						afterSlash = slash+1;
-					}
-				}
-				
-				// create temp file for cache
-				int fd = ::open(tempCachePath, O_CREAT | O_RDWR | O_TRUNC, 0644);	
-				if ( fd == -1 )
-					throwf("can't create temp file %s, errnor=%d", tempCachePath, errno);
-					
-				// try to allocate whole cache file contiguously
-				fstore_t fcntlSpec = { F_ALLOCATECONTIG|F_ALLOCATEALL, F_PEOFPOSMODE, 0, cacheFileSize, 0 };
-				::fcntl(fd, F_PREALLOCATE, &fcntlSpec);
-
-				// write out cache file
-				if ( verbose )
-					fprintf(stderr, "update_dyld_shared_cache: writing cache to disk: %s\n", tempCachePath);
-				if ( ::pwrite(fd, inMemoryCache, cacheFileSize, 0) != cacheFileSize )
-					throwf("write() failure creating cache file, errno=%d", errno);
-				if ( progress ) {
-					// assuming write takes 35% of time
-					fprintf(stdout, "%3u/%u\n", (archIndex+1)*90, archCount*100);
-				}
-				
-				// flush to disk and close
-				int result = ::fcntl(fd, F_FULLFSYNC, NULL);
-				if ( result == -1 ) 
-					fprintf(stderr, "update_dyld_shared_cache: warning, fcntl(F_FULLFSYNC) failed with errno=%d for %s\n", errno, tempCachePath);
-				result = ::close(fd);
-				if ( result != 0 ) 
-					fprintf(stderr, "update_dyld_shared_cache: warning, close() failed with errno=%d for %s\n", errno, tempCachePath);
-				
-				if ( !iPhoneOS )
-					adhoc_codesign_share_cache(tempCachePath);
-
-				// <rdar://problem/7901042> Make life easier for the kernel at shutdown.
-				// If we just move the new cache file over the old, the old file
-				// may need to exist in the open-unlink state.  But because it
-				// may be mapped into the shared region, it cannot be deleted
-				// until all user processes are terminated.  That leaves are
-				// small to non-existent window for the kernel to delete the
-				// old cache file.
-				if ( fCacheFileInFinalLocation ) {
-					char tmpDirPath[64];
-					const char* pathLastSlash = strrchr(fCacheFilePath, '/');
-					if ( pathLastSlash != NULL ) {
-						sprintf(tmpDirPath, "/var/run%s.old.%u", pathLastSlash, getpid());
-						// move existing cache file to /var/run to be clean up next boot
-						result = ::rename(fCacheFilePath, tmpDirPath);
-						if ( result != 0 ) {
-							if ( errno != ENOENT )
-								fprintf(stderr, "update_dyld_shared_cache: warning, unable to move existing cache to %s errno=%d for %s\n", tmpDirPath, errno, fCacheFilePath);
-						}
-					}
-				}
-				
-				// move new cache file to correct location for use after reboot
-				if ( verbose )
-					fprintf(stderr, "update_dyld_shared_cache: atomically moving cache file into place: %s\n", fCacheFilePath);
-				result = ::rename(tempCachePath, fCacheFilePath);
-				if ( result != 0 ) 
-					throwf("can't swap newly create dyld shared cache file: rename(%s,%s) returned errno=%d", tempCachePath, fCacheFilePath, errno);
-				
-				
-				// flush everything to disk to assure rename() gets recorded
-				sync_volume(fCacheFilePath);
+				((dyldCacheHeader<E>*)inMemoryCache)->set_cacheType(0);
+				writeCacheFile(fCacheFilePath, inMemoryCache, cacheFileSize, fCacheFileInFinalLocation);
 				didUpdate = true;
-				
-				// restore default signal handlers
-				::signal(SIGINT, SIG_DFL);
-				::signal(SIGBUS, SIG_DFL);
-				::signal(SIGSEGV, SIG_DFL);
-
 				// generate human readable "map" file that shows the layout of the cache file
 				if ( verbose )
 					fprintf(stderr, "update_dyld_shared_cache: writing .map file to disk\n");
@@ -2953,7 +3433,7 @@
 				sprintf(tempMapFilePath, "%s.map%u", fCacheFilePath, getpid());
 				FILE* fmap = ::fopen(tempMapFilePath, "w");	
 				if ( fmap == NULL ) {
-					fprintf(stderr, "can't create map file %s, errnor=%d", tempCachePath, errno);
+					fprintf(stderr, "can't create map file %s, errno=%d", tempMapFilePath, errno);
 				}
 				else {
 					for(std::vector<shared_file_mapping_np>::iterator it = fMappings.begin(); it != fMappings.end(); ++it) {
@@ -3016,6 +3496,11 @@
 								fLinkEditsStartAddress+fOffsetOfOldStringPoolInCombinedLinkedit,
 								fLinkEditsStartAddress+fOffsetOfOldStringPoolInCombinedLinkedit+fSizeOfOldStringPoolInCombinedLinkedit);				
 					
+					dyldCacheHeader<E>* cacheHeader = (dyldCacheHeader<E>*)inMemoryCache;
+					if ( cacheHeader->slideInfoSize() != 0 ) {
+						fprintf(fmap, " linkedit   %4lluKB kernel slide info\n", (cacheHeader->slideInfoSize())/1024);				
+					}
+
 					fprintf(fmap, "unmapped -- %4uMB local symbol info\n", fUnmappedLocalSymbolsSize/(1024*1024));					
 					
 					uint64_t endMappingAddr = fMappings[2].sfm_address + fMappings[2].sfm_size;
@@ -3047,7 +3532,7 @@
 						}
 					}
 					fclose(fmap);
-					result = ::rename(tempMapFilePath, mapFilePath);
+					::rename(tempMapFilePath, mapFilePath);
 				}
 			}
 			
@@ -3060,8 +3545,6 @@
 			}
 		}
 		catch (...){
-			// remove temp cache file
-			::unlink(tempCachePath);
 			// remove in memory cache
 			if ( inMemoryCache != NULL ) 
 				vm_deallocate(mach_task_self(), (vm_address_t)inMemoryCache, allocatedCacheSize);
@@ -3126,16 +3609,7 @@
 						*last = '\0';
 						--last;
 					}
-					// <rdar://problem/8305479> images in shared cache are bound against different IOKit than found at runtime
-					// HACK:  Just ignore the known bad IOKit
-					if ( strcmp(symbolStart, "/System/Library/Frameworks/IOKit.framework/IOKit") == 0 ) {
-						// Disable warning because after three years <rdar://problem/7089957> has still not been fixed...
-						//fprintf(stderr, "update_dyld_shared_cache: warning, ignoring /System/Library/Frameworks/IOKit.framework/IOKit\n");
-						//warnings.push_back("update_dyld_shared_cache: warning, ignoring /System/Library/Frameworks/IOKit.framework/IOKit\n");
-					}
-					else {
-						paths.push_back(symbolStart);
-					}
+					paths.push_back(symbolStart);
 					symbolStart = NULL;
 					state = lineStart;
 				}
@@ -3151,11 +3625,11 @@
 
 
 
-static void setSharedDylibs(const char* rootPath, const char* overlayPath, const std::set<ArchPair>& onlyArchs, std::vector<const char*> rootsPaths)
+static void setSharedDylibs(const char* rootPath, const std::vector<const char*>& overlayPaths, const std::set<ArchPair>& onlyArchs, std::vector<const char*> rootsPaths)
 {
 	// set file system root
 	ArchGraph::setFileSystemRoot(rootPath);
-	ArchGraph::setFileSystemOverlay(overlayPath);
+	ArchGraph::setFileSystemOverlay(overlayPaths);
 
 	// initialize all architectures requested
 	for(std::set<ArchPair>::iterator a = onlyArchs.begin(); a != onlyArchs.end(); ++a)
@@ -3171,7 +3645,7 @@
 }
 
 
-static void scanForSharedDylibs(const char* rootPath, const char* overlayPath, const char* dirOfPathFiles, const std::set<ArchPair>& onlyArchs)
+static void scanForSharedDylibs(const char* rootPath, const std::vector<const char*>& overlayPaths, const char* dirOfPathFiles, const std::set<ArchPair>& onlyArchs)
 {
 	char rootDirOfPathFiles[strlen(rootPath)+strlen(dirOfPathFiles)+2];
 	// in -root mode, look for roots in /rootpath/var/db/dyld
@@ -3216,14 +3690,14 @@
 	
 	if ( rootsPaths.size() == 0 )
 		fprintf(stderr, "update_dyld_shared_cache: warning, no entries found in shared_region_roots\n");
-	setSharedDylibs(rootPath, overlayPath, onlyArchs, rootsPaths);
-}
-
-static void setSharedDylibs(const char* rootPath, const char* overlayPath, const char* pathsFile, const std::set<ArchPair>& onlyArchs)
+	setSharedDylibs(rootPath, overlayPaths, onlyArchs, rootsPaths);
+}
+
+static void setSharedDylibs(const char* rootPath, const std::vector<const char*>& overlayPaths, const char* pathsFile, const std::set<ArchPair>& onlyArchs)
 {
 	std::vector<const char*> rootsPaths;
 	parsePathsFile(pathsFile, rootsPaths);
-	setSharedDylibs(rootPath, overlayPath, onlyArchs, rootsPaths);
+	setSharedDylibs(rootPath, overlayPaths, onlyArchs, rootsPaths);
 }
 
 
@@ -3270,7 +3744,7 @@
 
 
 
-static bool updateSharedeCacheFile(const char* rootPath, const char* overlayPath, const char* cacheDir, bool explicitCacheDir, const std::set<ArchPair>& onlyArchs, 
+static bool updateSharedeCacheFile(const char* rootPath, const std::vector<const char*>& overlayPaths, const char* cacheDir, bool explicitCacheDir, const std::set<ArchPair>& onlyArchs, 
 									bool force, bool alphaSort, bool optimize, bool deleteExistingFirst, bool verify, bool keepSignatures, bool dontMapLocalSymbols)
 {
 	bool didUpdate = false;
@@ -3298,19 +3772,25 @@
 		switch ( a->arch ) {
 			case CPU_TYPE_I386:
 				{
-					SharedCache<x86> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPath, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
+					SharedCache<x86> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPaths, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
 					didUpdate |= cache.update(force, optimize, deleteExistingFirst, index, archCount, keepSignatures, dontMapLocalSymbols);
 				}
 				break;
 			case CPU_TYPE_X86_64:
 				{
-					SharedCache<x86_64> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPath, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
+					SharedCache<x86_64> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPaths, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
 					didUpdate |= cache.update(force, optimize, deleteExistingFirst, index, archCount, keepSignatures, dontMapLocalSymbols);
 				}
 				break;
 			case CPU_TYPE_ARM:
 				{
-					SharedCache<arm> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPath, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
+					SharedCache<arm> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPaths, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
+					didUpdate |= cache.update(force, optimize, deleteExistingFirst, index, archCount, keepSignatures, dontMapLocalSymbols);
+				}
+				break;
+			case CPU_TYPE_ARM64:
+				{
+					SharedCache<arm64> cache(ArchGraph::graphForArchPair(*a), rootPath, overlayPaths, cacheDir, explicitCacheDir, alphaSort, verify, optimize, dyldBaseAddress);
 					didUpdate |= cache.update(force, optimize, deleteExistingFirst, index, archCount, keepSignatures, dontMapLocalSymbols);
 				}
 				break;
@@ -3334,7 +3814,7 @@
 {
 	std::set<ArchPair> onlyArchs;
 	const char* rootPath = "";
-	const char* overlayPath = "";
+	std::vector<const char*> overlayPaths;
 	const char* dylibListFile = NULL;
 	bool force = false;
 	bool alphaSort = false;
@@ -3343,6 +3823,7 @@
 	bool keepSignatures = false;
 	bool explicitCacheDir = false;
 	bool dontMapLocalSymbols = false;
+	bool relaunchForHaswell = false;
 	const char* cacheDir = NULL;
 	
 	try {
@@ -3389,9 +3870,10 @@
 						throw "-root missing path argument";
 				}
 				else if ( strcmp(arg, "-overlay") == 0 ) {
-					overlayPath = argv[++i];
-					if ( overlayPath == NULL )
+					const char* path = argv[++i];
+					if ( path == NULL )
 						throw "-overlay missing path argument";
+					overlayPaths.push_back(path);
 				}
 				else if ( strcmp(arg, "-cache_dir") == 0 ) {
 					cacheDir = argv[++i];
@@ -3405,6 +3887,8 @@
 						onlyArchs.insert(ArchPair(CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL));
 					else if ( strcmp(arch, "x86_64") == 0 )
 						onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL));
+					else if ( strcmp(arch, "x86_64h") == 0 )
+						onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H));
 					else if ( strcmp(arch, "armv4t") == 0 )
 						onlyArchs.insert(ArchPair(CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V4T));
 					else if ( strcmp(arch, "armv5") == 0 )
@@ -3419,12 +3903,15 @@
 						onlyArchs.insert(ArchPair(CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K));
 					else if ( strcmp(arch, "armv7s") == 0 )
 						onlyArchs.insert(ArchPair(CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S));
+					else if ( strcmp(arch, "arm64") == 0 )
+						onlyArchs.insert(ArchPair(CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL));
 					else 
 						throwf("unknown architecture %s", arch);
 				}
 				else if ( strcmp(arg, "-universal_boot") == 0 ) {
 					onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL));
 					onlyArchs.insert(ArchPair(CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL));
+					relaunchForHaswell = true;
 				}
 				else {
 					usage();
@@ -3447,11 +3934,11 @@
 		}
 		
 		// strip tailing slashes on -overlay
-		if ( overlayPath[0] != '\0' ) {
+		for (std::vector<const char*>::iterator it=overlayPaths.begin(); it != overlayPaths.end(); ++it) {
 			char realOverlayPath[MAXPATHLEN];
-			if ( realpath(overlayPath, realOverlayPath) == NULL )
-				throwf("realpath() failed on %s\n", overlayPath);
-			overlayPath = strdup(realOverlayPath);
+			if ( realpath(*it, realOverlayPath) == NULL )
+				throwf("realpath() failed on %s\n", *it);
+			*it = strdup(realOverlayPath);
 		}
 
 		// set default location to write cache dir
@@ -3470,8 +3957,20 @@
 			#if __i386__ || __x86_64__
 				onlyArchs.insert(ArchPair(CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL));
 				// check system is capable of running 64-bit programs
-				if ( (sysctlbyname("hw.optional.x86_64", &available, &len, NULL, 0) == 0) && available )
-					onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL));
+				if ( (sysctlbyname("hw.optional.x86_64", &available, &len, NULL, 0) == 0) && available ) {
+					// check system is capable of running x86_64h code
+					struct host_basic_info info;
+					mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+					mach_port_t hostPort = mach_host_self();
+					kern_return_t result = host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&info, &count);
+					mach_port_deallocate(mach_task_self(), hostPort);
+					if ( result != KERN_SUCCESS )
+						throw "host_info() failed";
+					if ( info.cpu_subtype == CPU_SUBTYPE_X86_64_H )
+						onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H));
+					else
+						onlyArchs.insert(ArchPair(CPU_TYPE_X86_64, CPU_SUBTYPE_X86_ALL));
+				}
 			#else
 				#error unsupported architecture
 			#endif
@@ -3483,10 +3982,10 @@
 		
 		// build list of shared dylibs
 		if ( dylibListFile != NULL )
-			setSharedDylibs(rootPath, overlayPath, dylibListFile, onlyArchs);
+			setSharedDylibs(rootPath, overlayPaths, dylibListFile, onlyArchs);
 		else
-			scanForSharedDylibs(rootPath, overlayPath, "/var/db/dyld/shared_region_roots/", onlyArchs);
-		bool didUpdate = updateSharedeCacheFile(rootPath, overlayPath, cacheDir, explicitCacheDir, onlyArchs, force, alphaSort, optimize,
+			scanForSharedDylibs(rootPath, overlayPaths, "/var/db/dyld/shared_region_roots/", onlyArchs);
+		bool didUpdate = updateSharedeCacheFile(rootPath, overlayPaths, cacheDir, explicitCacheDir, onlyArchs, force, alphaSort, optimize,
 								false, verify, keepSignatures, dontMapLocalSymbols);
 								
 		if ( didUpdate && !iPhoneOS ) {
@@ -3495,14 +3994,32 @@
 				typedef bool (*dscsym_proc_t)(const char *root);
 				dscsym_proc_t proc = (dscsym_proc_t)dlsym(handle, "dscsym_save_nuggets_for_current_caches");
 				const char* nuggetRootPath = "/";
-				if ( overlayPath[0] != '\0' ) 
-					nuggetRootPath = overlayPath;
+				if ( !overlayPaths.empty() ) 
+					nuggetRootPath = overlayPaths[0];
 				else if ( rootPath[0] != '\0' )
 					nuggetRootPath = rootPath;
 				(*proc)(nuggetRootPath);
 			}
 			dlclose(handle);
 		}
+
+		if ( relaunchForHaswell ) {
+			char cmd[2048];
+			strlcpy(cmd, argv[0], 2048);
+			strlcat(cmd, " -arch x86_64h", 2048);
+			if ( force )
+				strlcat(cmd, " -force", 2048);
+			if ( verify )
+				strlcat(cmd, " -verify", 2048);
+			if ( alphaSort )
+				strlcat(cmd, " -sort_by_name", 2048);
+			if ( (rootPath != NULL) && (rootPath[0] != '\0') ) {
+				strlcat(cmd, " -root ", 2048);
+				strlcat(cmd, rootPath, 2048);
+			}
+			return system(cmd);
+		}
+
 	}
 	catch (const char* msg) {
 		fprintf(stderr, "update_dyld_shared_cache failed: %s\n", msg);