Loading...
launch-cache/dsc_extractor.cpp dyld-551.4 dyld-239.4
--- dyld/dyld-551.4/launch-cache/dsc_extractor.cpp
+++ dyld/dyld-239.4/launch-cache/dsc_extractor.cpp
@@ -45,10 +45,8 @@
 
 #include "dsc_iterator.h"
 #include "dsc_extractor.h"
-#include "MachOTrie.hpp"
 
 #include <vector>
-#include <set>
 #include <map>
 #include <unordered_map>
 #include <algorithm>
@@ -78,36 +76,9 @@
 };
 typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
 
-// Filter to find individual symbol re-exports in trie
-class NotReExportSymbol {
-public:
-	NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
-	bool operator()(const mach_o::trie::Entry &entry) const {
-		bool result = isSymbolReExport(entry);
-		if (result) {
-			// <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
-			::free((void*)entry.name);
-			const_cast<mach_o::trie::Entry*>(&entry)->name = NULL;
-		}
-		return result;
-	}
-private:
-	bool isSymbolReExport(const mach_o::trie::Entry &entry) const {
-		if ( (entry.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
-			return true;
-		if ( (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
-			return true;
-		// If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
-		if ( _reexportDeps.count((int)entry.other) != 0 )
-			return true;
-		return false;
-	}
-	const std::set<int> &_reexportDeps;
-};
-
 
 template <typename A>
-int optimize_linkedit(macho_header<typename A::P>* mh, uint64_t textOffsetInCache, const void* mapped_cache, uint64_t* newSize) 
+int optimize_linkedit(macho_header<typename A::P>* mh, uint32_t textOffsetInCache, const void* mapped_cache, uint64_t* newSize) 
 {
 	typedef typename A::P P;
 	typedef typename A::P::E E;
@@ -118,26 +89,16 @@
 	
 	// update load commands
 	uint64_t cumulativeFileSize = 0;
-	const unsigned origLoadCommandsSize = mh->sizeofcmds();
-	unsigned bytesRemaining = origLoadCommandsSize;
-	unsigned removedCount = 0;
 	const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)mh + sizeof(macho_header<P>));
-	const uint32_t cmdCount = mh->ncmds();
+	const uint32_t cmd_count = mh->ncmds();
 	const macho_load_command<P>* cmd = cmds;
 	macho_segment_command<P>* linkEditSegCmd = NULL;
 	macho_symtab_command<P>* symtab = NULL;
 	macho_dysymtab_command<P>*	dynamicSymTab = NULL;
 	macho_linkedit_data_command<P>*	functionStarts = NULL;
 	macho_linkedit_data_command<P>*	dataInCode = NULL;
-	uint32_t exportsTrieOffset = 0;
-	uint32_t exportsTrieSize = 0;
-	std::set<int> reexportDeps;
-	int depIndex = 0;
-	for (uint32_t i = 0; i < cmdCount; ++i) {
-	    bool remove = false;
-		switch ( cmd->cmd() ) {
-		case macho_segment_command<P>::CMD:
-			{
+	for (uint32_t i = 0; i < cmd_count; ++i) {
+		if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
 			// update segment/section file offsets
 			macho_segment_command<P>* segCmd = (macho_segment_command<P>*)cmd;
 			segCmd->set_fileoff(cumulativeFileSize);
@@ -145,20 +106,16 @@
 			macho_section<P>* const sectionsEnd = &sectionsStart[segCmd->nsects()];
 			for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
 				if ( sect->offset() != 0 )
-					sect->set_offset((uint32_t)(cumulativeFileSize+sect->addr()-segCmd->vmaddr()));
+					sect->set_offset(cumulativeFileSize+sect->addr()-segCmd->vmaddr());
 			}
 			if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) {
 				linkEditSegCmd = segCmd;
 			}
 			cumulativeFileSize += segCmd->filesize();
-			}
-			break;
-		case LC_DYLD_INFO_ONLY:
-			{
+		}
+		else if ( cmd->cmd() == LC_DYLD_INFO_ONLY ) {
 			// zero out all dyld info
 			macho_dyld_info_command<P>* dyldInfo = (macho_dyld_info_command<P>*)cmd;
-			exportsTrieOffset = dyldInfo->export_off();
-			exportsTrieSize = dyldInfo->export_size();
 			dyldInfo->set_rebase_off(0);
 			dyldInfo->set_rebase_size(0);
 			dyldInfo->set_bind_off(0);
@@ -169,51 +126,22 @@
 			dyldInfo->set_lazy_bind_size(0);
 			dyldInfo->set_export_off(0);
 			dyldInfo->set_export_size(0);
-			}
-			break;
-		case LC_SYMTAB:
+		}
+		else if ( cmd->cmd() == LC_SYMTAB ) {
 			symtab = (macho_symtab_command<P>*)cmd;
-			break;
-		case LC_DYSYMTAB:
+		}
+		else if ( cmd->cmd() == LC_DYSYMTAB ) {
 			dynamicSymTab = (macho_dysymtab_command<P>*)cmd;
-			break;
-		case LC_FUNCTION_STARTS:
+		}
+		else if ( cmd->cmd() == LC_FUNCTION_STARTS ) {
 			functionStarts = (macho_linkedit_data_command<P>*)cmd;
-			break;
-		case LC_DATA_IN_CODE:
+		}
+		else if ( cmd->cmd() == LC_DATA_IN_CODE ) {
 			dataInCode = (macho_linkedit_data_command<P>*)cmd;
-			break;
-		case LC_LOAD_DYLIB:
-		case LC_LOAD_WEAK_DYLIB:
-		case LC_REEXPORT_DYLIB:
-		case LC_LOAD_UPWARD_DYLIB:
-			++depIndex;
-			if ( cmd->cmd() == LC_REEXPORT_DYLIB ) {
-				reexportDeps.insert(depIndex);
-			}
-			break;
-		case LC_SEGMENT_SPLIT_INFO:
-			// <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
-			remove = true;
-			break;
-		}
-		uint32_t cmdSize = cmd->cmdsize();
-		macho_load_command<P>* nextCmd = (macho_load_command<P>*)(((uint8_t*)cmd)+cmdSize);
-		if ( remove ) {
-			::memmove((void*)cmd, (void*)nextCmd, bytesRemaining);
-			++removedCount;
-		}
-		else {
-			bytesRemaining -= cmdSize;
-			cmd = nextCmd;
-		}
-	}
-	// zero out stuff removed
-	::bzero((void*)cmd, bytesRemaining);
-	// update header
-	mh->set_ncmds(cmdCount - removedCount);
-	mh->set_sizeofcmds(origLoadCommandsSize - bytesRemaining);
-
+		}
+		cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
+	}
+	
 	// rebuild symbol table
 	if ( linkEditSegCmd == NULL ) {
 		fprintf(stderr, "__LINKEDIT not found\n");
@@ -228,35 +156,26 @@
 		return -1;
 	}
 
-	const uint64_t newFunctionStartsOffset = linkEditSegCmd->fileoff();
+	const uint32_t newFunctionStartsOffset = linkEditSegCmd->fileoff();
 	uint32_t functionStartsSize = 0;
 	if ( functionStarts != NULL ) {
 		// copy function starts from original cache file to new mapped dylib file
 		functionStartsSize = functionStarts->datasize();
 		memcpy((char*)mh + newFunctionStartsOffset, (char*)mapped_cache + functionStarts->dataoff(), functionStartsSize);
 	}
-	const uint64_t newDataInCodeOffset = (newFunctionStartsOffset + functionStartsSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
+	const uint32_t newDataInCodeOffset = (newFunctionStartsOffset + functionStartsSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
 	uint32_t dataInCodeSize = 0;
 	if ( dataInCode != NULL ) {
 		// copy data-in-code info from original cache file to new mapped dylib file
 		dataInCodeSize = dataInCode->datasize();
 		memcpy((char*)mh + newDataInCodeOffset, (char*)mapped_cache + dataInCode->dataoff(), dataInCodeSize);
 	}
-
-	std::vector<mach_o::trie::Entry> exports;
-	if ( exportsTrieSize != 0 ) {
-		const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset; 
-		const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
-		mach_o::trie::parseTrie(exportsStart, exportsEnd, exports);
-		exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
-	}
-
+	
 	// look for local symbol info in unmapped part of shared cache
 	dyldCacheHeader<E>* header = (dyldCacheHeader<E>*)mapped_cache;
 	macho_nlist<P>* localNlists = NULL;
 	uint32_t localNlistCount = 0;
 	const char* localStrings = NULL;
-	const char* localStringsEnd = NULL;
 	if ( header->mappingOffset() > offsetof(dyld_cache_header,localSymbolsSize) ) {
 		dyldCacheLocalSymbolsInfo<E>* localInfo = (dyldCacheLocalSymbolsInfo<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset());
 		dyldCacheLocalSymbolEntry<E>* entries = (dyldCacheLocalSymbolEntry<E>*)(((uint8_t*)mapped_cache) + header->localSymbolsOffset() + localInfo->entriesOffset());
@@ -268,11 +187,11 @@
 				localNlistCount = entries[i].nlistCount();
 				localNlists = &allLocalNlists[localNlistStart];
 				localStrings = ((char*)localInfo) + localInfo->stringsOffset();
-				localStringsEnd = &localStrings[localInfo->stringsSize()];
 				break;
 			}
 		}
 	}
+	
 	// compute number of symbols in new symbol table
 	const macho_nlist<P>* const mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff());
 	const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()];
@@ -287,18 +206,14 @@
 		}
 	}
 	
-	// add room for N_INDR symbols for re-exported symbols
-	newSymCount += exports.size();
-
 	// copy symbol entries and strings from original cache file to new mapped dylib file
-	const uint64_t newSymTabOffset = (newDataInCodeOffset + dataInCodeSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
-	const uint64_t newIndSymTabOffset = newSymTabOffset + newSymCount*sizeof(macho_nlist<P>);
-	const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
+	const uint32_t newSymTabOffset = (newDataInCodeOffset + dataInCodeSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align
+	const uint32_t newIndSymTabOffset = newSymTabOffset + newSymCount*sizeof(macho_nlist<P>);
+	const uint32_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
 	macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
 	char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
 	const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff());
 	const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff();
-	const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize()];
 	macho_nlist<P>* t = newSymTabStart;
 	int poolOffset = 0;
 	uint32_t symbolsCopied = 0;
@@ -309,28 +224,8 @@
 			continue;
 		*t = *s;
 		t->set_n_strx(poolOffset);
-		const char* symName = &mergedStringPoolStart[s->n_strx()];
-		if ( symName > mergedStringPoolEnd )
-			symName = "<corrupt symbol name>";
-		strcpy(&newStringPoolStart[poolOffset], symName);
-		poolOffset += (strlen(symName) + 1);
-		++t;
-		++symbolsCopied;
-	}
-	// <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
-	for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
-		strcpy(&newStringPoolStart[poolOffset], it->name);
-		t->set_n_strx(poolOffset);
-		poolOffset += (strlen(it->name) + 1);
-		t->set_n_type(N_INDR | N_EXT);
-		t->set_n_sect(0);
-		t->set_n_desc(0);
-		const char* importName = it->importName;
-		if ( *importName == '\0' )
-			importName = it->name;
-		strcpy(&newStringPoolStart[poolOffset], importName);
-		t->set_n_value(poolOffset);
-		poolOffset += (strlen(importName) + 1);
+		strcpy(&newStringPoolStart[poolOffset], &mergedStringPoolStart[s->n_strx()]);
+		poolOffset += (strlen(&newStringPoolStart[poolOffset]) + 1);
 		++t;
 		++symbolsCopied;
 	}
@@ -341,8 +236,6 @@
 		// copy local symbols
 		for (uint32_t i=0; i < localNlistCount; ++i) {
 			const char* localName = &localStrings[localNlists[i].n_strx()];
-			if ( localName > localStringsEnd )
-				localName = "<corrupt local symbol name>";
 			*t = localNlists[i];
 			t->set_n_strx(poolOffset);
 			strcpy(&newStringPoolStart[poolOffset], localName);
@@ -366,33 +259,27 @@
 	
 	// update load commands
 	if ( functionStarts != NULL ) {
-		functionStarts->set_dataoff((uint32_t)newFunctionStartsOffset);
+		functionStarts->set_dataoff(newFunctionStartsOffset);
 		functionStarts->set_datasize(functionStartsSize);
 	}
 	if ( dataInCode != NULL ) {
-		dataInCode->set_dataoff((uint32_t)newDataInCodeOffset);
+		dataInCode->set_dataoff(newDataInCodeOffset);
 		dataInCode->set_datasize(dataInCodeSize);
 	}
 	symtab->set_nsyms(symbolsCopied);
-	symtab->set_symoff((uint32_t)newSymTabOffset);
-	symtab->set_stroff((uint32_t)newStringPoolOffset);
+	symtab->set_symoff(newSymTabOffset);
+	symtab->set_stroff(newStringPoolOffset);
 	symtab->set_strsize(poolOffset);
 	dynamicSymTab->set_extreloff(0);
 	dynamicSymTab->set_nextrel(0);
 	dynamicSymTab->set_locreloff(0);
 	dynamicSymTab->set_nlocrel(0);
-	dynamicSymTab->set_indirectsymoff((uint32_t)newIndSymTabOffset);
+	dynamicSymTab->set_indirectsymoff(newIndSymTabOffset);
 	linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff());
 	linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) );
 	
 	// return new size
 	*newSize = (symtab->stroff()+symtab->strsize()+4095) & (-4096);
-	
-	// <rdar://problem/17671438> Xcode 6 leaks in dyld_shared_cache_extract_dylibs
-	for (std::vector<mach_o::trie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
-		::free((void*)(it->name));
-	}
-	
 	
 	return 0;
 }
@@ -461,8 +348,8 @@
     FA->align                                   = OSSwapHostToBigInt32(12);
     
 	// Write regular segments into the buffer
-	uint64_t                totalSize           = 0;
-    uint64_t				textOffsetInCache	= 0;
+	uint32_t                totalSize           = 0;
+    uint32_t				textOffsetInCache	= 0;
 	for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
         
         if(strcmp(it->segName, "__TEXT") == 0 ) {
@@ -477,7 +364,7 @@
                 
                 if(   afa->cputype == FA->cputype
                    && afa->cpusubtype == FA->cpusubtype) {
-                    //fprintf(stderr, "arch already exists in fat dylib\n");
+                    fprintf(stderr, "arch already exists in fat dylib\n");
                     dylib_data.resize(offsetInFatFile);
                     return offsetInFatFile;
                 }
@@ -497,7 +384,7 @@
 	optimize_linkedit<A>(((macho_header<P>*)(base_ptr+offsetInFatFile)), textOffsetInCache, mapped_cache, &newSize);
 	
 	// update fat header with new file size
-    dylib_data.resize((size_t)(offsetInFatFile+newSize));
+    dylib_data.resize(offsetInFatFile+newSize);
     base_ptr                                    = &dylib_data.front();
 	FA->size                                    = OSSwapHostToBigInt32(newSize);
 #undef FH
@@ -521,7 +408,7 @@
 		return -1;
 	}
 	
-	void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
+	void* mapped_cache = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
 	if (mapped_cache == MAP_FAILED) {
 		fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
 		return -1;
@@ -534,8 +421,6 @@
 	     if ( strcmp((char*)mapped_cache, "dyld_v1    i386") == 0 ) 
 		dylib_create_func = dylib_maker<x86>;
 	else if ( strcmp((char*)mapped_cache, "dyld_v1  x86_64") == 0 ) 
-		dylib_create_func = dylib_maker<x86_64>;
-	else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 ) 
 		dylib_create_func = dylib_maker<x86_64>;
 	else if ( strcmp((char*)mapped_cache, "dyld_v1   armv5") == 0 ) 
 		dylib_create_func = dylib_maker<arm>;
@@ -545,25 +430,21 @@
 		dylib_create_func = dylib_maker<arm>;
 	else if ( strncmp((char*)mapped_cache, "dyld_v1  armv7", 14) == 0 ) 
 		dylib_create_func = dylib_maker<arm>;
-	else if ( strcmp((char*)mapped_cache, "dyld_v1   arm64") == 0 ) 
-		dylib_create_func = dylib_maker<arm64>;
-	else if ( strcmp((char*)mapped_cache, "dyld_v1  arm64e") == 0 )
-		dylib_create_func = dylib_maker<arm64>;
 	else {
 		fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
-        munmap(mapped_cache, (size_t)statbuf.st_size);
+        munmap(mapped_cache, statbuf.st_size);
 		return -1;
 	}
 
 	// iterate through all images in cache and build map of dylibs and segments
 	__block NameToSegments  map;
-	__block int				result              = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
+	__block int				result              = dyld_shared_cache_iterate(mapped_cache, statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
         map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
     });
 
     if(result != 0) {
 		fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
-        munmap(mapped_cache, (size_t)statbuf.st_size);
+        munmap(mapped_cache, statbuf.st_size);
 		return result;
     }
 
@@ -573,10 +454,11 @@
     dispatch_queue_t        process_queue       = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
     dispatch_queue_t        writer_queue        = dispatch_queue_create("dyld writer queue", 0);
     
+	__block int             cumulativeResult    = 0;
 	__block unsigned        count               = 0;
     
 	for ( NameToSegments::iterator it = map.begin(); it != map.end(); ++it) {
-		dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
+        dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
         dispatch_group_async(group, process_queue, ^{
             
             char    dylib_path[PATH_MAX];
@@ -592,7 +474,7 @@
             int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644);
             if ( fd == -1 ) {
                 fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
-                result    = -1;
+                cumulativeResult    = -1;
                 return;
             }
             
@@ -600,29 +482,29 @@
             if (fstat(fd, &statbuf)) {
                 fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno);
                 close(fd);
-                result    = -1;
+                cumulativeResult    = -1;
                 return;
             }
             
-            std::vector<uint8_t> *vec   = new std::vector<uint8_t>((size_t)statbuf.st_size);
+            std::vector<uint8_t> *vec   = new std::vector<uint8_t>(statbuf.st_size);
             if(pread(fd, &vec->front(), vec->size(), 0) != (long)vec->size()) {
                 fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno);
                 close(fd);
-                result    = -1;
+                cumulativeResult    = -1;
                 return;
             }
             
             const size_t    offset  = dylib_create_func(mapped_cache, *vec, it->second);
             
             dispatch_group_async(group, writer_queue, ^{
-                progress(count++, (unsigned)map.size());
+                progress(count++, map.size());
                 
                 if(offset != vec->size()) {
                     //Write out the first page, and everything after offset
                     if(   pwrite(fd, &vec->front(), 4096, 0) == -1 
                        || pwrite(fd, &vec->front() + offset, vec->size() - offset, offset) == -1) {
                         fprintf(stderr, "error writing, errnor=%d\n", errno);
-                        result    = -1;
+                        cumulativeResult    = -1;
                     }
                 }
                 
@@ -637,8 +519,8 @@
     dispatch_release(group);
     dispatch_release(writer_queue);
     
-    munmap(mapped_cache, (size_t)statbuf.st_size);
-	return result;
+    munmap(mapped_cache, statbuf.st_size);
+	return cumulativeResult;
 }