Loading...
dyld3/shared-cache/CacheBuilder.cpp dyld-551.4 dyld-655.1
--- dyld/dyld-551.4/dyld3/shared-cache/CacheBuilder.cpp
+++ dyld/dyld-655.1/dyld3/shared-cache/CacheBuilder.cpp
@@ -28,8 +28,11 @@
 #include <sys/errno.h>
 #include <sys/fcntl.h>
 #include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <mach/mach.h>
 #include <mach/mach_time.h>
+#include <mach/mach_vm.h>
 #include <mach-o/loader.h>
 #include <mach-o/fat.h>
 #include <mach/shared_region.h>
@@ -44,34 +47,46 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include "MachOParser.h"
+#include "MachOFileAbstraction.hpp"
 #include "CodeSigningTypes.h"
 #include "DyldSharedCache.h"
 #include "CacheBuilder.h"
 #include "FileAbstraction.hpp"
-#include "LaunchCacheWriter.h"
 #include "Trie.hpp"
+#include "FileUtils.h"
 #include "Diagnostics.h"
-#include "ImageProxy.h"
+#include "ClosureBuilder.h"
+#include "Closure.h"
+#include "StringUtils.h"
 
 #if __has_include("dyld_cache_config.h")
     #include "dyld_cache_config.h"
 #else
-    #define ARM_SHARED_REGION_START    0x1A000000ULL
-    #define ARM_SHARED_REGION_SIZE     0x26000000ULL
-    #define ARM64_SHARED_REGION_START 0x180000000ULL
-    #define ARM64_SHARED_REGION_SIZE   0x40000000ULL
+    #define ARM_SHARED_REGION_START      0x1A000000ULL
+    #define ARM_SHARED_REGION_SIZE       0x26000000ULL
+    #define ARM64_SHARED_REGION_START   0x180000000ULL
+    #define ARM64_SHARED_REGION_SIZE     0x40000000ULL
 #endif
 
+#ifndef ARM64_32_SHARED_REGION_START
+    #define ARM64_32_SHARED_REGION_START 0x1A000000ULL
+    #define ARM64_32_SHARED_REGION_SIZE  0x26000000ULL
+#endif
+
 const CacheBuilder::ArchLayout CacheBuilder::_s_archLayout[] = {
-    { 0x7FFF20000000ULL,         0xEFE00000ULL,             0x40000000, 0xFFFF000000000000, "x86_64",  0,          0,          0,          12, true,  true  },
-    { 0x7FFF20000000ULL,         0xEFE00000ULL,             0x40000000, 0xFFFF000000000000, "x86_64h", 0,          0,          0,          12, true,  true  },
-    { SHARED_REGION_BASE_I386,   SHARED_REGION_SIZE_I386,   0x00200000,                0x0, "i386",    0,          0,          0,          12, false, false },
-    { ARM64_SHARED_REGION_START, ARM64_SHARED_REGION_SIZE,  0x02000000, 0x00FFFF0000000000, "arm64",   0x0000C000, 0x00100000, 0x07F00000, 14, false, true  },
-    { ARM64_SHARED_REGION_START, ARM64_SHARED_REGION_SIZE,  0x02000000, 0x00FFFF0000000000, "arm64e",  0x0000C000, 0x00100000, 0x07F00000, 14, false, true  },
-    { ARM_SHARED_REGION_START,   ARM_SHARED_REGION_SIZE,    0x02000000,         0xE0000000, "armv7s",  0,          0,          0,          14, false, false },
-    { ARM_SHARED_REGION_START,   ARM_SHARED_REGION_SIZE,    0x00400000,         0xE0000000, "armv7k",  0,          0,          0,          14, false, false },
-    { 0x40000000,                0x40000000,                0x02000000,                0x0, "sim-x86", 0,          0,          0,          14, false, false }
+    { 0x7FFF20000000ULL,            0xEFE00000ULL,              0x40000000, 0xFFFF000000000000, "x86_64",  0,          0,          0,          12, 2, true,  true  },
+    { 0x7FFF20000000ULL,            0xEFE00000ULL,              0x40000000, 0xFFFF000000000000, "x86_64h", 0,          0,          0,          12, 2, true,  true  },
+    { SHARED_REGION_BASE_I386,      SHARED_REGION_SIZE_I386,    0x00200000,                0x0, "i386",    0,          0,          0,          12, 0, false, false },
+    { ARM64_SHARED_REGION_START,    ARM64_SHARED_REGION_SIZE,   0x02000000, 0x00FFFF0000000000, "arm64",   0x0000C000, 0x00100000, 0x07F00000, 14, 2, false, true  },
+#if SUPPORT_ARCH_arm64e
+    { ARM64_SHARED_REGION_START,    ARM64_SHARED_REGION_SIZE,   0x02000000, 0x00FFFF0000000000, "arm64e",  0x0000C000, 0x00100000, 0x07F00000, 14, 2, false, true  },
+#endif
+#if SUPPORT_ARCH_arm64_32
+    { ARM64_32_SHARED_REGION_START, ARM64_32_SHARED_REGION_SIZE,0x02000000,         0xC0000000, "arm64_32",0x0000C000, 0x00100000, 0x07F00000, 14, 6, false, false },
+#endif
+    { ARM_SHARED_REGION_START,      ARM_SHARED_REGION_SIZE,     0x02000000,         0xE0000000, "armv7s",  0,          0,          0,          14, 4, false, false },
+    { ARM_SHARED_REGION_START,      ARM_SHARED_REGION_SIZE,     0x00400000,         0xE0000000, "armv7k",  0,          0,          0,          14, 4, false, false },
+    { 0x40000000,                   0x40000000,                 0x02000000,                0x0, "sim-x86", 0,          0,          0,          14, 0, false, false }
 };
 
 
@@ -82,17 +97,16 @@
 };
 
 
-CacheBuilder::CacheBuilder(const DyldSharedCache::CreateOptions& options)
+CacheBuilder::CacheBuilder(const DyldSharedCache::CreateOptions& options, const dyld3::closure::FileSystem& fileSystem)
     : _options(options)
-    , _buffer(nullptr)
+    , _fileSystem(fileSystem)
+    , _fullAllocatedBuffer(0)
     , _diagnostics(options.loggingPrefix, options.verbose)
     , _archLayout(nullptr)
     , _aliasCount(0)
     , _slideInfoFileOffset(0)
     , _slideInfoBufferSizeAllocated(0)
     , _allocatedBufferSize(0)
-    , _currentFileSize(0)
-    , _vmSize(0)
     , _branchPoolsLinkEditStartAddr(0)
 {
 
@@ -106,6 +120,10 @@
             break;
         }
     }
+
+    if (!_archLayout) {
+        _diagnostics.error("Tool was built without support for: '%s'", targetArch.c_str());
+    }
 }
 
 
@@ -119,26 +137,28 @@
     return _diagnostics.warnings();
 }
 
-const std::set<const mach_header*> CacheBuilder::evictions()
+const std::set<const dyld3::MachOAnalyzer*> CacheBuilder::evictions()
 {
     return _evictions;
 }
 
 void CacheBuilder::deleteBuffer()
 {
-    vm_deallocate(mach_task_self(), (vm_address_t)_buffer, _allocatedBufferSize);
-    _buffer = nullptr;
+    vm_deallocate(mach_task_self(), _fullAllocatedBuffer, _archLayout->sharedMemorySize);
+    _fullAllocatedBuffer = 0;
     _allocatedBufferSize = 0;
 }
 
-std::vector<DyldSharedCache::MappedMachO>
-CacheBuilder::makeSortedDylibs(const std::vector<DyldSharedCache::MappedMachO>& dylibs, const std::unordered_map<std::string, unsigned> sortOrder)
-{
-    std::vector<DyldSharedCache::MappedMachO> sortedDylibs = dylibs;
-
-    std::sort(sortedDylibs.begin(), sortedDylibs.end(), [&](const DyldSharedCache::MappedMachO& a, const DyldSharedCache::MappedMachO& b) {
-        const auto& orderA = sortOrder.find(a.runtimePath);
-        const auto& orderB = sortOrder.find(b.runtimePath);
+
+void CacheBuilder::makeSortedDylibs(const std::vector<LoadedMachO>& dylibs, const std::unordered_map<std::string, unsigned> sortOrder)
+{
+    for (const LoadedMachO& dylib : dylibs) {
+        _sortedDylibs.push_back({ &dylib, dylib.mappedFile.runtimePath, {} });
+    }
+
+    std::sort(_sortedDylibs.begin(), _sortedDylibs.end(), [&](const DylibInfo& a, const DylibInfo& b) {
+        const auto& orderA = sortOrder.find(a.input->mappedFile.runtimePath);
+        const auto& orderB = sortOrder.find(b.input->mappedFile.runtimePath);
         bool foundA = (orderA != sortOrder.end());
         bool foundB = (orderB != sortOrder.end());
 
@@ -152,10 +172,8 @@
         else if ( foundB )
              return false;
         else
-             return a.runtimePath < b.runtimePath;
+             return a.input->mappedFile.runtimePath < b.input->mappedFile.runtimePath;
     });
-
-    return sortedDylibs;
 }
 
 
@@ -166,24 +184,565 @@
 
 struct DylibAndSize
 {
-    const char*     installName;
-    uint64_t        size;
+    const CacheBuilder::LoadedMachO*    input;
+    const char*                         installName;
+    uint64_t                            size;
 };
 
-bool CacheBuilder::cacheOverflow(const dyld_cache_mapping_info regions[3])
+uint64_t CacheBuilder::cacheOverflowAmount()
 {
     if ( _archLayout->sharedRegionsAreDiscontiguous ) {
         // for macOS x86_64 cache, need to check each region for overflow
-        return ( (regions[0].size > 0x60000000) || (regions[1].size > 0x40000000) || (regions[2].size > 0x3FE00000) );
+        if ( _readExecuteRegion.sizeInUse > 0x60000000 )
+            return (_readExecuteRegion.sizeInUse - 0x60000000);
+
+        if ( _readWriteRegion.sizeInUse > 0x40000000 )
+            return (_readWriteRegion.sizeInUse - 0x40000000);
+
+        if ( _readOnlyRegion.sizeInUse > 0x3FE00000 )
+            return (_readOnlyRegion.sizeInUse - 0x3FE00000);
     }
     else {
-        return (_vmSize > _archLayout->sharedMemorySize);
-    }
+        bool alreadyOptimized = (_readOnlyRegion.sizeInUse != _readOnlyRegion.bufferSize);
+        uint64_t vmSize = _readOnlyRegion.unslidLoadAddress - _readExecuteRegion.unslidLoadAddress;
+        if ( alreadyOptimized )
+            vmSize += _readOnlyRegion.sizeInUse;
+        else if ( _options.excludeLocalSymbols )
+            vmSize += (_readOnlyRegion.sizeInUse * 37/100); // assume locals removal and LINKEDIT optimzation reduces LINKEDITs %25 of original size
+        else
+            vmSize += (_readOnlyRegion.sizeInUse * 80/100); // assume LINKEDIT optimzation reduces LINKEDITs to %80 of original size
+        if ( vmSize > _archLayout->sharedMemorySize )
+            return vmSize - _archLayout->sharedMemorySize;
+    }
+    // fits in shared region
+    return 0;
+}
+
+size_t CacheBuilder::evictLeafDylibs(uint64_t reductionTarget, std::vector<const LoadedMachO*>& overflowDylibs)
+{
+    // build count of how many references there are to each dylib
+    __block std::map<std::string, unsigned int> referenceCount;
+    for (const DylibInfo& dylib : _sortedDylibs) {
+        dylib.input->mappedFile.mh->forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
+            referenceCount[loadPath] += 1;
+        });
+    }
+
+    // find all dylibs not referenced
+    std::vector<DylibAndSize> unreferencedDylibs;
+    for (const DylibInfo& dylib : _sortedDylibs) {
+        const char* installName = dylib.input->mappedFile.mh->installName();
+        if ( referenceCount.count(installName) == 0 ) {
+            // conservative: sum up all segments except LINKEDIT
+            __block uint64_t segsSize = 0;
+            dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& info, bool& stop) {
+                if ( strcmp(info.segName, "__LINKEDIT") != 0 )
+                    segsSize += info.vmSize;
+            });
+            unreferencedDylibs.push_back({ dylib.input, installName, segsSize });
+        }
+    }
+    // sort leaf dylibs by size
+    std::sort(unreferencedDylibs.begin(), unreferencedDylibs.end(), [&](const DylibAndSize& a, const DylibAndSize& b) {
+        return ( a.size > b.size );
+    });
+
+     // build set of dylibs that if removed will allow cache to build
+    for (DylibAndSize& dylib : unreferencedDylibs) {
+        if ( _options.verbose )
+            _diagnostics.warning("to prevent cache overflow, not caching %s", dylib.installName);
+        _evictions.insert(dylib.input->mappedFile.mh);
+        // Track the evicted dylibs so we can try build "other" dlopen closures for them.
+        overflowDylibs.push_back(dylib.input);
+        if ( dylib.size > reductionTarget )
+            break;
+        reductionTarget -= dylib.size;
+    }
+
+    // prune _sortedDylibs
+    _sortedDylibs.erase(std::remove_if(_sortedDylibs.begin(), _sortedDylibs.end(), [&](const DylibInfo& dylib) {
+        return (_evictions.count(dylib.input->mappedFile.mh) != 0);
+    }),_sortedDylibs.end());
+
+    return _evictions.size();
+}
+
+// Handles building a list of input files to the CacheBuilder itself.
+class CacheInputBuilder {
+public:
+    CacheInputBuilder(const dyld3::closure::FileSystem& fileSystem,
+                      std::string reqArchitecture, dyld3::Platform reqPlatform)
+    : fileSystem(fileSystem), reqArchitecture(reqArchitecture), reqPlatform(reqPlatform) { }
+
+    // Loads and maps any MachOs in the given list of files.
+    void loadMachOs(std::vector<CacheBuilder::InputFile>& inputFiles,
+                    std::vector<CacheBuilder::LoadedMachO>& dylibsToCache,
+                    std::vector<CacheBuilder::LoadedMachO>& otherDylibs,
+                    std::vector<CacheBuilder::LoadedMachO>& executables,
+                    std::vector<CacheBuilder::LoadedMachO>& couldNotLoadFiles) {
+
+        std::map<std::string, uint64_t> dylibInstallNameMap;
+        for (CacheBuilder::InputFile& inputFile : inputFiles) {
+            dyld3::closure::LoadedFileInfo loadedFileInfo = dyld3::MachOAnalyzer::load(inputFile.diag, fileSystem, inputFile.path, reqArchitecture.c_str(), reqPlatform);
+            const dyld3::MachOAnalyzer* ma = (const dyld3::MachOAnalyzer*)loadedFileInfo.fileContent;
+            if (ma == nullptr) {
+                couldNotLoadFiles.emplace_back((CacheBuilder::LoadedMachO){ DyldSharedCache::MappedMachO(), loadedFileInfo, &inputFile });
+                continue;
+            }
+
+            DyldSharedCache::MappedMachO mappedFile(inputFile.path, ma, loadedFileInfo.sliceLen, false, false,
+                                                    loadedFileInfo.sliceOffset, loadedFileInfo.mtime, loadedFileInfo.inode);
+
+            // The file can be loaded with the given slice, but we may still want to exlude it from the cache.
+            if (ma->isDylib()) {
+                std::string installName = ma->installName();
+
+                // Let the platform exclude the file before we do anything else.
+                if (platformExcludesInstallName(installName)) {
+                    inputFile.diag.verbose("Platform excluded file\n");
+                    fileSystem.unloadFile(loadedFileInfo);
+                    continue;
+                }
+
+                if (!ma->canBePlacedInDyldCache(inputFile.path, ^(const char* msg) {
+                    inputFile.diag.warning("Dylib located at '%s' cannot be placed in cache because: %s", inputFile.path, msg);
+                })) {
+                    // TODO: Add exclusion lists here?
+                    // Probably not as we already applied the dylib exclusion list.
+                    otherDylibs.emplace_back((CacheBuilder::LoadedMachO){ mappedFile, loadedFileInfo, &inputFile });
+                    continue;
+                }
+
+                // Otherwise see if we have another file with this install name
+                auto iteratorAndInserted = dylibInstallNameMap.insert(std::make_pair(installName, dylibsToCache.size()));
+                if (iteratorAndInserted.second) {
+                    // We inserted the dylib so we haven't seen another with this name.
+                    if (installName[0] != '@' && installName != inputFile.path) {
+                        inputFile.diag.warning("Dylib located at '%s' has installname '%s'", inputFile.path, installName.c_str());
+                    }
+
+                    dylibsToCache.emplace_back((CacheBuilder::LoadedMachO){ mappedFile, loadedFileInfo, &inputFile });
+                } else {
+                    // We didn't insert this one so we've seen it before.
+                    CacheBuilder::LoadedMachO& previousLoadedMachO = dylibsToCache[iteratorAndInserted.first->second];
+                    inputFile.diag.warning("Multiple dylibs claim installname '%s' ('%s' and '%s')", installName.c_str(), inputFile.path, previousLoadedMachO.mappedFile.runtimePath.c_str());
+
+                    // This is the "Good" one, overwrite
+                    if (inputFile.path == installName) {
+                        // Unload the old one
+                        fileSystem.unloadFile(previousLoadedMachO.loadedFileInfo);
+
+                        // And replace with this one.
+                        previousLoadedMachO.mappedFile = mappedFile;
+                        previousLoadedMachO.loadedFileInfo = loadedFileInfo;
+                    }
+                }
+            } else if (ma->isBundle()) {
+                // TODO: Add exclusion lists here?
+                otherDylibs.emplace_back((CacheBuilder::LoadedMachO){ mappedFile, loadedFileInfo, &inputFile });
+            } else if (ma->isDynamicExecutable()) {
+                if (platformExcludesExecutablePath_macOS(inputFile.path)) {
+                    inputFile.diag.verbose("Platform excluded file\n");
+                    fileSystem.unloadFile(loadedFileInfo);
+                    continue;
+                }
+                executables.emplace_back((CacheBuilder::LoadedMachO){ mappedFile, loadedFileInfo, &inputFile });
+            } else {
+                inputFile.diag.verbose("Unsupported mach file type\n");
+                fileSystem.unloadFile(loadedFileInfo);
+            }
+        }
+    }
+
+private:
+
+
+
+    static bool platformExcludesInstallName_macOS(const std::string& installName) {
+        return false;
+    }
+
+    static bool platformExcludesInstallName_iOS(const std::string& installName) {
+        if ( installName == "/System/Library/Caches/com.apple.xpc/sdk.dylib" )
+            return true;
+        if ( installName == "/System/Library/Caches/com.apple.xpcd/xpcd_cache.dylib" )
+            return true;
+        return false;
+    }
+
+    static bool platformExcludesInstallName_tvOS(const std::string& installName) {
+        return platformExcludesInstallName_iOS(installName);
+    }
+
+    static bool platformExcludesInstallName_watchOS(const std::string& installName) {
+        return platformExcludesInstallName_iOS(installName);
+    }
+
+    static bool platformExcludesInstallName_bridgeOS(const std::string& installName) {
+        return platformExcludesInstallName_iOS(installName);
+    }
+
+    // Returns true if the current platform requires that this install name be excluded from the shared cache
+    // Note that this overrides any exclusion from anywhere else.
+    bool platformExcludesInstallName(const std::string& installName) {
+        switch (reqPlatform) {
+            case dyld3::Platform::unknown:
+                return false;
+            case dyld3::Platform::macOS:
+                return platformExcludesInstallName_macOS(installName);
+            case dyld3::Platform::iOS:
+                return platformExcludesInstallName_iOS(installName);
+            case dyld3::Platform::tvOS:
+                return platformExcludesInstallName_tvOS(installName);
+            case dyld3::Platform::watchOS:
+                return platformExcludesInstallName_watchOS(installName);
+            case dyld3::Platform::bridgeOS:
+                return platformExcludesInstallName_bridgeOS(installName);
+            case dyld3::Platform::iOSMac:
+                return false;
+            case dyld3::Platform::iOS_simulator:
+                return false;
+            case dyld3::Platform::tvOS_simulator:
+                return false;
+            case dyld3::Platform::watchOS_simulator:
+                return false;
+        }
+    }
+
+
+
+
+    static bool platformExcludesExecutablePath_macOS(const std::string& path) {
+        return false;
+    }
+
+    static bool platformExcludesExecutablePath_iOS(const std::string& path) {
+        //HACK exclude all launchd and installd variants until we can do something about xpcd_cache.dylib and friends
+        if (path == "/sbin/launchd"
+            || path == "/usr/local/sbin/launchd.debug"
+            || path == "/usr/local/sbin/launchd.development"
+            || path == "/usr/libexec/installd") {
+            return true;
+        }
+        return false;
+    }
+
+    static bool platformExcludesExecutablePath_tvOS(const std::string& path) {
+        return platformExcludesExecutablePath_iOS(path);
+    }
+
+    static bool platformExcludesExecutablePath_watchOS(const std::string& path) {
+        return platformExcludesExecutablePath_iOS(path);
+    }
+
+    static bool platformExcludesExecutablePath_bridgeOS(const std::string& path) {
+        return platformExcludesExecutablePath_iOS(path);
+    }
+
+    // Returns true if the current platform requires that this path be excluded from the shared cache
+    // Note that this overrides any exclusion from anywhere else.
+    bool platformExcludesExecutablePath(const std::string& path) {
+        switch (reqPlatform) {
+            case dyld3::Platform::unknown:
+                return false;
+            case dyld3::Platform::macOS:
+                return platformExcludesExecutablePath_macOS(path);
+            case dyld3::Platform::iOS:
+                return platformExcludesExecutablePath_iOS(path);
+            case dyld3::Platform::tvOS:
+                return platformExcludesExecutablePath_tvOS(path);
+            case dyld3::Platform::watchOS:
+                return platformExcludesExecutablePath_watchOS(path);
+            case dyld3::Platform::bridgeOS:
+                return platformExcludesExecutablePath_bridgeOS(path);
+            case dyld3::Platform::iOSMac:
+                return false;
+            case dyld3::Platform::iOS_simulator:
+                return false;
+            case dyld3::Platform::tvOS_simulator:
+                return false;
+            case dyld3::Platform::watchOS_simulator:
+                return false;
+        }
+    }
+
+    const dyld3::closure::FileSystem&                   fileSystem;
+    std::string                                         reqArchitecture;
+    dyld3::Platform                                     reqPlatform;
+};
+
+static void verifySelfContained(std::vector<CacheBuilder::LoadedMachO>& dylibsToCache,
+                                std::vector<CacheBuilder::LoadedMachO>& otherDylibs,
+                                std::vector<CacheBuilder::LoadedMachO>& couldNotLoadFiles)
+{
+    // build map of dylibs
+    __block std::map<std::string, const CacheBuilder::LoadedMachO*> knownDylibs;
+    __block std::map<std::string, const CacheBuilder::LoadedMachO*> allDylibs;
+    for (const CacheBuilder::LoadedMachO& dylib : dylibsToCache) {
+        knownDylibs.insert({ dylib.mappedFile.runtimePath, &dylib });
+        allDylibs.insert({ dylib.mappedFile.runtimePath, &dylib });
+        if (const char* installName = dylib.mappedFile.mh->installName()) {
+            knownDylibs.insert({ installName, &dylib });
+            allDylibs.insert({ installName, &dylib });
+        }
+    }
+
+    for (const CacheBuilder::LoadedMachO& dylib : otherDylibs) {
+        allDylibs.insert({ dylib.mappedFile.runtimePath, &dylib });
+        if (const char* installName = dylib.mappedFile.mh->installName())
+            allDylibs.insert({ installName, &dylib });
+    }
+
+    for (const CacheBuilder::LoadedMachO& dylib : couldNotLoadFiles) {
+        allDylibs.insert({ dylib.inputFile->path, &dylib });
+    }
+
+    // check all dependencies to assure every dylib in cache only depends on other dylibs in cache
+    __block std::map<std::string, std::set<std::string>> badDylibs;
+    __block bool doAgain = true;
+    while ( doAgain ) {
+        doAgain = false;
+        // scan dylib list making sure all dependents are in dylib list
+        for (const CacheBuilder::LoadedMachO& dylib : dylibsToCache) {
+            if ( badDylibs.count(dylib.mappedFile.runtimePath) != 0 )
+                continue;
+            dylib.mappedFile.mh->forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
+                if (isWeak)
+                    return;
+                if ( knownDylibs.count(loadPath) == 0 ) {
+                    badDylibs[dylib.mappedFile.runtimePath].insert(std::string("Could not find dependency '") + loadPath + "'");
+                    knownDylibs.erase(dylib.mappedFile.runtimePath);
+                    knownDylibs.erase(dylib.mappedFile.mh->installName());
+                    doAgain = true;
+                }
+            });
+        }
+    }
+
+    // Now walk the dylibs which depend on missing dylibs and see if any of them are required binaries.
+    for (auto badDylibsIterator : badDylibs) {
+        const std::string& dylibRuntimePath = badDylibsIterator.first;
+        auto requiredDylibIterator = allDylibs.find(dylibRuntimePath);
+        if (requiredDylibIterator == allDylibs.end())
+            continue;
+        if (!requiredDylibIterator->second->inputFile->mustBeIncluded())
+            continue;
+        // This dylib is required so mark all dependencies as requried too
+        __block std::vector<const CacheBuilder::LoadedMachO*> worklist;
+        worklist.push_back(requiredDylibIterator->second);
+        while (!worklist.empty()) {
+            const CacheBuilder::LoadedMachO* dylib = worklist.back();
+            worklist.pop_back();
+            if (!dylib->mappedFile.mh)
+                continue;
+            dylib->mappedFile.mh->forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
+                if (isWeak)
+                    return;
+                auto dylibIterator = allDylibs.find(loadPath);
+                if (dylibIterator != allDylibs.end()) {
+                    if (dylibIterator->second->inputFile->state == CacheBuilder::InputFile::Unset) {
+                        dylibIterator->second->inputFile->state = CacheBuilder::InputFile::MustBeIncludedForDependent;
+                        worklist.push_back(dylibIterator->second);
+                    }
+                }
+            });
+        }
+    }
+
+    // FIXME: Make this an option we can pass in
+    const bool evictLeafDylibs = true;
+    if (evictLeafDylibs) {
+        doAgain = true;
+        while ( doAgain ) {
+            doAgain = false;
+
+            // build count of how many references there are to each dylib
+            __block std::set<std::string> referencedDylibs;
+            for (const CacheBuilder::LoadedMachO& dylib : dylibsToCache) {
+                if ( badDylibs.count(dylib.mappedFile.runtimePath) != 0 )
+                    continue;
+                dylib.mappedFile.mh->forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
+                    referencedDylibs.insert(loadPath);
+                });
+            }
+
+            // find all dylibs not referenced
+            std::vector<DylibAndSize> unreferencedDylibs;
+            for (const CacheBuilder::LoadedMachO& dylib : dylibsToCache) {
+                if ( badDylibs.count(dylib.mappedFile.runtimePath) != 0 )
+                    continue;
+                const char* installName = dylib.mappedFile.mh->installName();
+                if ( (referencedDylibs.count(installName) == 0) && (dylib.inputFile->state == CacheBuilder::InputFile::MustBeExcludedIfUnused) ) {
+                    badDylibs[dylib.mappedFile.runtimePath].insert(std::string("It has been explicitly excluded as it is unused"));
+                    doAgain = true;
+                }
+            }
+        }
+    }
+
+    // Move bad dylibs from dylibs to cache to other dylibs.
+    for (const CacheBuilder::LoadedMachO& dylib : dylibsToCache) {
+        auto i = badDylibs.find(dylib.mappedFile.runtimePath);
+        if ( i != badDylibs.end()) {
+            otherDylibs.push_back(dylib);
+            for (const std::string& reason : i->second )
+                otherDylibs.back().inputFile->diag.warning("Dylib located at '%s' not placed in shared cache because: %s", dylib.mappedFile.runtimePath.c_str(), reason.c_str());
+        }
+    }
+
+    const auto& badDylibsLambdaRef = badDylibs;
+    dylibsToCache.erase(std::remove_if(dylibsToCache.begin(), dylibsToCache.end(), [&](const CacheBuilder::LoadedMachO& dylib) {
+        if (badDylibsLambdaRef.find(dylib.mappedFile.runtimePath) != badDylibsLambdaRef.end())
+            return true;
+        return false;
+    }), dylibsToCache.end());
+}
+
+// This is the new build API which takes the raw files (which could be FAT) and tries to build a cache from them.
+// We should remove the other build() method, or make it private so that this can wrap it.
+void CacheBuilder::build(std::vector<CacheBuilder::InputFile>& inputFiles,
+                         std::vector<DyldSharedCache::FileAlias>& aliases) {
+    // First filter down to files which are actually MachO's
+    CacheInputBuilder cacheInputBuilder(_fileSystem, _archLayout->archName, _options.platform);
+
+    std::vector<LoadedMachO> dylibsToCache;
+    std::vector<LoadedMachO> otherDylibs;
+    std::vector<LoadedMachO> executables;
+    std::vector<LoadedMachO> couldNotLoadFiles;
+    cacheInputBuilder.loadMachOs(inputFiles, dylibsToCache, otherDylibs, executables, couldNotLoadFiles);
+
+    verifySelfContained(dylibsToCache, otherDylibs, couldNotLoadFiles);
+
+    // Check for required binaries before we try to build the cache
+    if (!_diagnostics.hasError()) {
+        // If we succeeded in building, then now see if there was a missing required file, and if so why its missing.
+        std::string errorString;
+        for (const LoadedMachO& dylib : otherDylibs) {
+            if (dylib.inputFile->mustBeIncluded()) {
+                // An error loading a required file must be propagated up to the top level diagnostic handler.
+                bool gotWarning = false;
+                for (const std::string& warning : dylib.inputFile->diag.warnings()) {
+                    gotWarning = true;
+                    std::string message = warning;
+                    if (message.back() == '\n')
+                        message.pop_back();
+                    if (!errorString.empty())
+                        errorString += "ERROR: ";
+                    errorString += "Required binary was not included in the shared cache '" + std::string(dylib.inputFile->path) + "' because: " + message + "\n";
+                }
+                if (!gotWarning) {
+                    if (!errorString.empty())
+                        errorString += "ERROR: ";
+                    errorString += "Required binary was not included in the shared cache '" + std::string(dylib.inputFile->path) + "' because: 'unknown error.  Please report to dyld'\n";
+                }
+            }
+        }
+        for (const LoadedMachO& dylib : couldNotLoadFiles) {
+            if (dylib.inputFile->mustBeIncluded()) {
+                if (dylib.inputFile->diag.hasError()) {
+                    if (!errorString.empty())
+                        errorString += "ERROR: ";
+                    errorString += "Required binary was not included in the shared cache '" + std::string(dylib.inputFile->path) + "' because: " + dylib.inputFile->diag.errorMessage() + "\n";
+                } else {
+                    if (!errorString.empty())
+                        errorString += "ERROR: ";
+                    errorString += "Required binary was not included in the shared cache '" + std::string(dylib.inputFile->path) + "' because: 'unknown error.  Please report to dyld'\n";
+
+                }
+            }
+        }
+        if (!errorString.empty()) {
+            _diagnostics.error("%s", errorString.c_str());
+        }
+    }
+
+    if (!_diagnostics.hasError())
+        build(dylibsToCache, otherDylibs, executables, aliases);
+
+    if (!_diagnostics.hasError()) {
+        // If we succeeded in building, then now see if there was a missing required file, and if so why its missing.
+        std::string errorString;
+        for (CacheBuilder::InputFile& inputFile : inputFiles) {
+            if (inputFile.mustBeIncluded() && inputFile.diag.hasError()) {
+                // An error loading a required file must be propagated up to the top level diagnostic handler.
+                std::string message = inputFile.diag.errorMessage();
+                if (message.back() == '\n')
+                    message.pop_back();
+                errorString += "Required binary was not included in the shared cache '" + std::string(inputFile.path) + "' because: " + message + "\n";
+            }
+        }
+        if (!errorString.empty()) {
+            _diagnostics.error("%s", errorString.c_str());
+        }
+    }
+
+    // Add all the warnings from the input files to the top level warnings on the main diagnostics object.
+    for (CacheBuilder::InputFile& inputFile : inputFiles) {
+        for (const std::string& warning : inputFile.diag.warnings())
+            _diagnostics.warning("%s", warning.c_str());
+    }
+
+    // Clean up the loaded files
+    for (LoadedMachO& loadedMachO : dylibsToCache)
+        _fileSystem.unloadFile(loadedMachO.loadedFileInfo);
+    for (LoadedMachO& loadedMachO : otherDylibs)
+        _fileSystem.unloadFile(loadedMachO.loadedFileInfo);
+    for (LoadedMachO& loadedMachO : executables)
+        _fileSystem.unloadFile(loadedMachO.loadedFileInfo);
 }
 
 void CacheBuilder::build(const std::vector<DyldSharedCache::MappedMachO>& dylibs,
                          const std::vector<DyldSharedCache::MappedMachO>& otherOsDylibsInput,
-                         const std::vector<DyldSharedCache::MappedMachO>& osExecutables)
+                         const std::vector<DyldSharedCache::MappedMachO>& osExecutables,
+                         std::vector<DyldSharedCache::FileAlias>& aliases) {
+
+    std::vector<LoadedMachO> dylibsToCache;
+    std::vector<LoadedMachO> otherDylibs;
+    std::vector<LoadedMachO> executables;
+
+    for (const DyldSharedCache::MappedMachO& mappedMachO : dylibs) {
+        dyld3::closure::LoadedFileInfo loadedFileInfo;
+        loadedFileInfo.fileContent      = mappedMachO.mh;
+        loadedFileInfo.fileContentLen   = mappedMachO.length;
+        loadedFileInfo.sliceOffset      = mappedMachO.sliceFileOffset;
+        loadedFileInfo.sliceLen         = mappedMachO.length;
+        loadedFileInfo.inode            = mappedMachO.inode;
+        loadedFileInfo.mtime            = mappedMachO.modTime;
+        loadedFileInfo.path             = mappedMachO.runtimePath.c_str();
+        dylibsToCache.emplace_back((LoadedMachO){ mappedMachO, loadedFileInfo, nullptr });
+    }
+
+    for (const DyldSharedCache::MappedMachO& mappedMachO : otherOsDylibsInput) {
+        dyld3::closure::LoadedFileInfo loadedFileInfo;
+        loadedFileInfo.fileContent      = mappedMachO.mh;
+        loadedFileInfo.fileContentLen   = mappedMachO.length;
+        loadedFileInfo.sliceOffset      = mappedMachO.sliceFileOffset;
+        loadedFileInfo.sliceLen         = mappedMachO.length;
+        loadedFileInfo.inode            = mappedMachO.inode;
+        loadedFileInfo.mtime            = mappedMachO.modTime;
+        loadedFileInfo.path             = mappedMachO.runtimePath.c_str();
+        otherDylibs.emplace_back((LoadedMachO){ mappedMachO, loadedFileInfo, nullptr });
+    }
+
+    for (const DyldSharedCache::MappedMachO& mappedMachO : osExecutables) {
+        dyld3::closure::LoadedFileInfo loadedFileInfo;
+        loadedFileInfo.fileContent      = mappedMachO.mh;
+        loadedFileInfo.fileContentLen   = mappedMachO.length;
+        loadedFileInfo.sliceOffset      = mappedMachO.sliceFileOffset;
+        loadedFileInfo.sliceLen         = mappedMachO.length;
+        loadedFileInfo.inode            = mappedMachO.inode;
+        loadedFileInfo.mtime            = mappedMachO.modTime;
+        loadedFileInfo.path             = mappedMachO.runtimePath.c_str();
+        executables.emplace_back((LoadedMachO){ mappedMachO, loadedFileInfo, nullptr });
+    }
+
+    build(dylibsToCache, otherDylibs, executables, aliases);
+}
+
+void CacheBuilder::build(const std::vector<LoadedMachO>& dylibs,
+                         const std::vector<LoadedMachO>& otherOsDylibsInput,
+                         const std::vector<LoadedMachO>& osExecutables,
+                         std::vector<DyldSharedCache::FileAlias>& aliases)
 {
     // <rdar://problem/21317611> error out instead of crash if cache has no dylibs
     // FIXME: plist should specify required vs optional dylibs
@@ -193,274 +752,144 @@
     }
     uint64_t t1 = mach_absolute_time();
 
-
     // make copy of dylib list and sort
-    std::vector<DyldSharedCache::MappedMachO> sortedDylibs = makeSortedDylibs(dylibs, _options.dylibOrdering);
-    std::vector<DyldSharedCache::MappedMachO> otherOsDylibs = otherOsDylibsInput;
-
-    // assign addresses for each segment of each dylib in new cache
-    dyld_cache_mapping_info regions[3];
-    SegmentMapping segmentMapping = assignSegmentAddresses(sortedDylibs, regions);
-    while ( cacheOverflow(regions) ) {
-        if ( !_options.evictLeafDylibsOnOverflow ) {
-            _diagnostics.error("cache overflow: %lluMB (max %lluMB)", _vmSize / 1024 / 1024, (_archLayout->sharedMemorySize) / 1024 / 1024);
-            return;
-        }
-        // find all leaf (not referenced by anything else in cache) dylibs
-
-        // build count of how many references there are to each dylib
-        __block std::map<std::string, unsigned int> referenceCount;
-        for (const DyldSharedCache::MappedMachO& dylib : sortedDylibs) {
-            dyld3::MachOParser parser(dylib.mh);
-            parser.forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
-                referenceCount[loadPath] += 1;
-            });
-        }
-
-        // find all dylibs not referenced
-        std::vector<DylibAndSize> unreferencedDylibs;
-        for (const DyldSharedCache::MappedMachO& dylib : sortedDylibs) {
-            dyld3::MachOParser parser(dylib.mh);
-            const char* installName = parser.installName();
-            if ( referenceCount.count(installName) == 0 ) {
-                // conservative: sum up all segments except LINKEDIT
-                __block uint64_t segsSize = 0;
-                parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool &stop) {
-                    if ( strcmp(segName, "__LINKEDIT") != 0 )
-                        segsSize += vmSize;
-                });
-                unreferencedDylibs.push_back({installName, segsSize});
-            }
-        }
-        // sort leaf dylibs by size
-        std::sort(unreferencedDylibs.begin(), unreferencedDylibs.end(), [&](const DylibAndSize& a, const DylibAndSize& b) {
-            return ( a.size > b.size );
-        });
-
-        // build set of dylibs that if removed will allow cache to build
-        uint64_t reductionTarget = _vmSize - _archLayout->sharedMemorySize;
-        std::set<std::string> toRemove;
-        for (DylibAndSize& dylib : unreferencedDylibs) {
-            if ( _options.verbose )
-                _diagnostics.warning("to prevent cache overflow, not caching %s", dylib.installName);
-            toRemove.insert(dylib.installName);
-            if ( dylib.size > reductionTarget )
-                break;
-            reductionTarget -= dylib.size;
-        }
-        // transfer overflow dylibs from cached vector to other vector
-        for (const std::string& installName : toRemove) {
-            for (std::vector<DyldSharedCache::MappedMachO>::iterator it=sortedDylibs.begin(); it != sortedDylibs.end(); ++it) {
-                dyld3::MachOParser parser(it->mh);
-                if ( installName == parser.installName() ) {
-                    _evictions.insert(parser.header());
-                    otherOsDylibs.push_back(*it);
-                    sortedDylibs.erase(it);
-                    break;
-                }
-            }
-        }
-        // re-layout cache
-        segmentMapping = assignSegmentAddresses(sortedDylibs, regions);
-        if ( unreferencedDylibs.size() == 0 && cacheOverflow(regions) ) {
-            _diagnostics.error("cache overflow, tried evicting %ld leaf daylibs, but still too big: %lluMB (max %lluMB)",
-                               toRemove.size(), _vmSize / 1024 / 1024, (_archLayout->sharedMemorySize) / 1024 / 1024);
-            return;
-        }
-    }
-
-    // allocate buffer for new cache
-    _allocatedBufferSize = std::max(_currentFileSize, (uint64_t)0x100000)*1.1; // add 10% to allocation to support large closures
-    if ( vm_allocate(mach_task_self(), (vm_address_t*)&_buffer, _allocatedBufferSize, VM_FLAGS_ANYWHERE) != 0 ) {
+    makeSortedDylibs(dylibs, _options.dylibOrdering);
+
+    // allocate space used by largest possible cache plus room for LINKEDITS before optimization
+    _allocatedBufferSize = _archLayout->sharedMemorySize * 1.50;
+    if ( vm_allocate(mach_task_self(), &_fullAllocatedBuffer, _allocatedBufferSize, VM_FLAGS_ANYWHERE) != 0 ) {
         _diagnostics.error("could not allocate buffer");
         return;
     }
-    _currentFileSize = _allocatedBufferSize;
-
-    // write unoptimized cache
-    writeCacheHeader(regions, sortedDylibs, segmentMapping);
-    copyRawSegments(sortedDylibs, segmentMapping);
-    adjustAllImagesForNewSegmentLocations(sortedDylibs, segmentMapping);
+
+    // assign addresses for each segment of each dylib in new cache
+    assignSegmentAddresses();
+    std::vector<const LoadedMachO*> overflowDylibs;
+    while ( cacheOverflowAmount() != 0 ) {
+        if ( !_options.evictLeafDylibsOnOverflow ) {
+            _diagnostics.error("cache overflow by %lluMB", cacheOverflowAmount() / 1024 / 1024);
+            return;
+        }
+        size_t evictionCount = evictLeafDylibs(cacheOverflowAmount(), overflowDylibs);
+        // re-layout cache
+        for (DylibInfo& dylib : _sortedDylibs)
+            dylib.cacheLocation.clear();
+        assignSegmentAddresses();
+
+        _diagnostics.verbose("cache overflow, evicted %lu leaf dylibs\n", evictionCount);
+    }
+    markPaddingInaccessible();
+
+     // copy all segments into cache
+    uint64_t t2 = mach_absolute_time();
+    writeCacheHeader();
+    copyRawSegments();
+
+    // rebase all dylibs for new location in cache
+    uint64_t t3 = mach_absolute_time();
+    _aslrTracker.setDataRegion(_readWriteRegion.buffer, _readWriteRegion.sizeInUse);
+    adjustAllImagesForNewSegmentLocations();
     if ( _diagnostics.hasError() )
         return;
 
-    bindAllImagesInCacheFile(regions);
+    // build ImageArray for dyld3, which has side effect of binding all cached dylibs
+    uint64_t t4 = mach_absolute_time();
+    buildImageArray(aliases);
     if ( _diagnostics.hasError() )
         return;
 
     // optimize ObjC
+    uint64_t t5 = mach_absolute_time();
+    DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
     if ( _options.optimizeObjC )
-        optimizeObjC(_buffer, _archLayout->is64, _options.optimizeStubs, _pointersForASLR, _diagnostics);
+        optimizeObjC();
     if ( _diagnostics.hasError() )
         return;
 
+
     // optimize away stubs
+    uint64_t t6 = mach_absolute_time();
     std::vector<uint64_t> branchPoolOffsets;
     uint64_t cacheStartAddress = _archLayout->sharedMemoryStart;
     if ( _options.optimizeStubs ) {
         std::vector<uint64_t> branchPoolStartAddrs;
-        const uint64_t* p = (uint64_t*)((uint8_t*)_buffer + _buffer->header.branchPoolsOffset);
-        for (int i=0; i < _buffer->header.branchPoolsCount; ++i) {
+        const uint64_t* p = (uint64_t*)((uint8_t*)dyldCache + dyldCache->header.branchPoolsOffset);
+        for (uint32_t i=0; i < dyldCache->header.branchPoolsCount; ++i) {
             uint64_t poolAddr = p[i];
             branchPoolStartAddrs.push_back(poolAddr);
             branchPoolOffsets.push_back(poolAddr - cacheStartAddress);
         }
-        bypassStubs(_buffer, branchPoolStartAddrs, _s_neverStubEliminate, _diagnostics);
-    }
-    uint64_t t2 = mach_absolute_time();
-
-    // FIPS seal corecrypto, This must be done after stub elimination (so that
-    // __TEXT,__text is not changed after sealing), but before LINKEDIT
-    // optimization  (so that we still have access to local symbols)
+        optimizeAwayStubs(branchPoolStartAddrs, _branchPoolsLinkEditStartAddr);
+    }
+
+
+    // FIPS seal corecrypto, This must be done after stub elimination (so that __TEXT,__text is not changed after sealing)
     fipsSign();
 
     // merge and compact LINKEDIT segments
-    dyld_cache_local_symbols_info* localsInfo = nullptr;
-    if ( dylibs.size() == 0 )
-        _currentFileSize = 0x1000;
-    else
-        _currentFileSize = optimizeLinkedit(_buffer, _archLayout->is64, _options.excludeLocalSymbols, _options.optimizeStubs, branchPoolOffsets, _diagnostics, &localsInfo);
-
-    uint64_t t3 = mach_absolute_time();
-
-    // add ImageGroup for all dylibs in cache
-    __block std::vector<DyldSharedCache::MappedMachO> cachedDylibs;
-    std::unordered_map<std::string, const DyldSharedCache::MappedMachO*> mapIntoSortedDylibs;
-    for (const DyldSharedCache::MappedMachO& entry : sortedDylibs) {
-        mapIntoSortedDylibs[entry.runtimePath] = &entry;
-    }
-    _buffer->forEachImage(^(const mach_header* mh, const char* installName) {
-        auto pos = mapIntoSortedDylibs.find(installName);
-        if ( pos != mapIntoSortedDylibs.end() ) {
-            DyldSharedCache::MappedMachO newEntry = *(pos->second);
-            newEntry.mh = mh;
-            cachedDylibs.push_back(newEntry);
-        }
-        else {
-            bool found = false;
-            for (const std::string& prefix :  _options.pathPrefixes) {
-                std::string fullPath = prefix + installName;
-                char resolvedPath[PATH_MAX];
-                if ( realpath(fullPath.c_str(), resolvedPath) != nullptr ) {
-                    std::string resolvedUnPrefixed = &resolvedPath[prefix.size()];
-                    pos = mapIntoSortedDylibs.find(resolvedUnPrefixed);
-                    if ( pos != mapIntoSortedDylibs.end() ) {
-                        DyldSharedCache::MappedMachO newEntry = *(pos->second);
-                        newEntry.mh = mh;
-                        cachedDylibs.push_back(newEntry);
-                        found = true;
-                   }
-                }
-            }
-            if ( !found )
-                fprintf(stderr, "missing mapping for %s\n", installName);
-        }
-    });
-    dyld3::DyldCacheParser dyldCacheParser(_buffer, true);
-    dyld3::ImageProxyGroup* dylibGroup = dyld3::ImageProxyGroup::makeDyldCacheDylibsGroup(_diagnostics, dyldCacheParser, cachedDylibs,
-                                                                                          _options.pathPrefixes, _patchTable,
-                                                                                          _options.optimizeStubs, !_options.dylibsRemovedDuringMastering);
+    uint64_t t7 = mach_absolute_time();
+    optimizeLinkedit(branchPoolOffsets);
+
+    // copy ImageArray to end of read-only region
+    addImageArray();
     if ( _diagnostics.hasError() )
         return;
-    addCachedDylibsImageGroup(dylibGroup);
+
+    // compute and add dlopen closures for all other dylibs
+    addOtherImageArray(otherOsDylibsInput, overflowDylibs);
     if ( _diagnostics.hasError() )
         return;
 
-    uint64_t t4 = mach_absolute_time();
-
-    // add ImageGroup for other OS dylibs and bundles
-    dyld3::ImageProxyGroup* otherGroup = dyld3::ImageProxyGroup::makeOtherOsGroup(_diagnostics, dyldCacheParser, dylibGroup, otherOsDylibs,
-                                                                                  _options.inodesAreSameAsRuntime, _options.pathPrefixes);
+    // compute and add launch closures to end of read-only region
+    uint64_t t8 = mach_absolute_time();
+    addClosures(osExecutables);
     if ( _diagnostics.hasError() )
         return;
-    addCachedOtherDylibsImageGroup(otherGroup);
-    if ( _diagnostics.hasError() )
-        return;
-
-    uint64_t t5 = mach_absolute_time();
-
-    // compute and add launch closures
-    std::map<std::string, const dyld3::launch_cache::binary_format::Closure*> closures;
-    for (const DyldSharedCache::MappedMachO& mainProg : osExecutables) {
-        Diagnostics clsDiag;
-        const dyld3::launch_cache::binary_format::Closure* cls = dyld3::ImageProxyGroup::makeClosure(clsDiag, dyldCacheParser, dylibGroup, otherGroup, mainProg,
-                                                                                                     _options.inodesAreSameAsRuntime, _options.pathPrefixes);
-        if ( clsDiag.hasError() ) {
-            // if closure cannot be built, silently skip it, unless in verbose mode
-            if ( _options.verbose ) {
-                _diagnostics.warning("building closure for '%s': %s", mainProg.runtimePath.c_str(), clsDiag.errorMessage().c_str());
-                for (const std::string& warn : clsDiag.warnings() )
-                    _diagnostics.warning("%s", warn.c_str());
-            }
-        }
-        else {
-            closures[mainProg.runtimePath] = cls;
-       }
-    }
-    addClosures(closures);
-    if ( _diagnostics.hasError() )
-        return;
-
-    uint64_t t6 = mach_absolute_time();
-
+
+    // update final readOnly region size
+    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)(_readExecuteRegion.buffer + dyldCache->header.mappingOffset);
+    mappings[2].size = _readOnlyRegion.sizeInUse;
+    if ( _options.excludeLocalSymbols )
+        dyldCache->header.localSymbolsOffset = _readOnlyRegion.cacheFileOffset + _readOnlyRegion.sizeInUse;
+
+    // record max slide now that final size is established
+    if ( _archLayout->sharedRegionsAreDiscontiguous ) {
+        // special case x86_64 which has three non-contiguous chunks each in their own 1GB regions
+        uint64_t maxSlide0 = 0x60000000 - _readExecuteRegion.sizeInUse; // TEXT region has 1.5GB region
+        uint64_t maxSlide1 = 0x40000000 - _readWriteRegion.sizeInUse;
+        uint64_t maxSlide2 = 0x3FE00000 - _readOnlyRegion.sizeInUse;
+        dyldCache->header.maxSlide = std::min(std::min(maxSlide0, maxSlide1), maxSlide2);
+    }
+    else {
+        dyldCache->header.maxSlide = (_archLayout->sharedMemoryStart + _archLayout->sharedMemorySize) - (_readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse);
+    }
+
+    uint64_t t9 = mach_absolute_time();
+    
     // fill in slide info at start of region[2]
     // do this last because it modifies pointers in DATA segments
     if ( _options.cacheSupportsASLR ) {
+#if SUPPORT_ARCH_arm64e
+        if ( strcmp(_archLayout->archName, "arm64e") == 0 )
+            writeSlideInfoV3(_aslrTracker.bitmap(), _aslrTracker.dataPageCount());
+        else
+#endif
         if ( _archLayout->is64 )
-            writeSlideInfoV2<Pointer64<LittleEndian>>();
+            writeSlideInfoV2<Pointer64<LittleEndian>>(_aslrTracker.bitmap(), _aslrTracker.dataPageCount());
         else
-            writeSlideInfoV2<Pointer32<LittleEndian>>();
-    }
-
-    uint64_t t7 = mach_absolute_time();
-
-    // update last region size
-    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    _currentFileSize = align(_currentFileSize, _archLayout->sharedRegionAlignP2);
-    mappings[2].size = _currentFileSize - mappings[2].fileOffset;
-
-    // record cache bounds
-    _buffer->header.sharedRegionStart = _archLayout->sharedMemoryStart;
-    _buffer->header.sharedRegionSize  = _archLayout->sharedMemorySize;
-    if ( _archLayout->sharedRegionsAreDiscontiguous ) {
-        // special case x86_64 which has three non-contiguous chunks each in their own 1GB regions
-        uint64_t maxSlide0 = 0x60000000 - mappings[0].size; // TEXT region has 1.5GB region
-        uint64_t maxSlide1 = 0x40000000 - mappings[1].size;
-        uint64_t maxSlide2 = 0x3FE00000 - mappings[2].size;
-        _buffer->header.maxSlide = std::min(std::min(maxSlide0, maxSlide1), maxSlide2);
-    }
-    else {
-        _buffer->header.maxSlide = (_archLayout->sharedMemoryStart + _archLayout->sharedMemorySize) - (mappings[2].address + mappings[2].size);
-    }
-
-    // append "unmapped" local symbols region
-    if ( _options.excludeLocalSymbols ) {
-        size_t localsInfoSize = align(localsInfo->stringsOffset + localsInfo->stringsSize, _archLayout->sharedRegionAlignP2);
-        if ( _currentFileSize + localsInfoSize > _allocatedBufferSize ) {
-            _diagnostics.warning("local symbols omitted because cache buffer overflow");
-        }
-        else {
-            memcpy((char*)_buffer+_currentFileSize, localsInfo, localsInfoSize);
-            _buffer->header.localSymbolsOffset = _currentFileSize;
-            _buffer->header.localSymbolsSize   = localsInfoSize;
-            _currentFileSize += localsInfoSize;
-        }
-        free((void*)localsInfo);
-    }
-
-    recomputeCacheUUID();
-
-    // Calculate the VMSize of the resulting cache
-    __block uint64_t endAddr = 0;
-    _buffer->forEachRegion(^(const void* content, uint64_t vmAddr, uint64_t size, uint32_t permissions) {
-        if (vmAddr+size > endAddr)
-            endAddr = vmAddr+size;
-    });
-    _vmSize = endAddr - cacheStartAddress;
+#if SUPPORT_ARCH_arm64_32
+        if ( strcmp(_archLayout->archName, "arm64_32") == 0 )
+            writeSlideInfoV4<Pointer32<LittleEndian>>(_aslrTracker.bitmap(), _aslrTracker.dataPageCount());
+        else
+#endif
+            writeSlideInfoV2<Pointer32<LittleEndian>>(_aslrTracker.bitmap(), _aslrTracker.dataPageCount());
+    }
+
+    uint64_t t10 = mach_absolute_time();
 
     // last sanity check on size
-    if ( _vmSize > _archLayout->sharedMemorySize ) {
-        _diagnostics.error("cache overflow after optimizations.  %lluMB (max %lluMB)", _vmSize / 1024 / 1024, (_archLayout->sharedMemorySize) / 1024 / 1024);
+    if ( cacheOverflowAmount() != 0 ) {
+        _diagnostics.error("cache overflow after optimizations 0x%llX -> 0x%llX", _readExecuteRegion.unslidLoadAddress, _readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse);
         return;
     }
 
@@ -469,31 +898,26 @@
     if ( _diagnostics.hasError() )
         return;
 
-    uint64_t t8 = mach_absolute_time();
+    uint64_t t11 = mach_absolute_time();
 
     if ( _options.verbose ) {
-        fprintf(stderr, "time to copy and bind cached dylibs: %ums\n", absolutetime_to_milliseconds(t2-t1));
-        fprintf(stderr, "time to optimize LINKEDITs: %ums\n", absolutetime_to_milliseconds(t3-t2));
-        fprintf(stderr, "time to build ImageGroup of %lu cached dylibs: %ums\n", sortedDylibs.size(), absolutetime_to_milliseconds(t4-t3));
-        fprintf(stderr, "time to build ImageGroup of %lu other dylibs: %ums\n", otherOsDylibs.size(), absolutetime_to_milliseconds(t5-t4));
-        fprintf(stderr, "time to build %lu closures: %ums\n", osExecutables.size(), absolutetime_to_milliseconds(t6-t5));
-        fprintf(stderr, "time to compute slide info: %ums\n", absolutetime_to_milliseconds(t7-t6));
-        fprintf(stderr, "time to compute UUID and codesign cache file: %ums\n", absolutetime_to_milliseconds(t8-t7));
-    }
-
-    // trim over allocated buffer
-    if ( _allocatedBufferSize > _currentFileSize ) {
-        uint8_t* startOfUnused  = (uint8_t*)_buffer+_currentFileSize;
-        size_t unusedLen = _allocatedBufferSize-_currentFileSize;
-        vm_deallocate(mach_task_self(), (vm_address_t)startOfUnused, unusedLen);
-        _allocatedBufferSize = _currentFileSize;
+        fprintf(stderr, "time to layout cache: %ums\n", absolutetime_to_milliseconds(t2-t1));
+        fprintf(stderr, "time to copy cached dylibs into buffer: %ums\n", absolutetime_to_milliseconds(t3-t2));
+        fprintf(stderr, "time to adjust segments for new split locations: %ums\n", absolutetime_to_milliseconds(t4-t3));
+        fprintf(stderr, "time to bind all images: %ums\n", absolutetime_to_milliseconds(t5-t4));
+        fprintf(stderr, "time to optimize Objective-C: %ums\n", absolutetime_to_milliseconds(t6-t5));
+        fprintf(stderr, "time to do stub elimination: %ums\n", absolutetime_to_milliseconds(t7-t6));
+        fprintf(stderr, "time to optimize LINKEDITs: %ums\n", absolutetime_to_milliseconds(t8-t7));
+        fprintf(stderr, "time to build %lu closures: %ums\n", osExecutables.size(), absolutetime_to_milliseconds(t9-t8));
+        fprintf(stderr, "time to compute slide info: %ums\n", absolutetime_to_milliseconds(t10-t9));
+        fprintf(stderr, "time to compute UUID and codesign cache file: %ums\n", absolutetime_to_milliseconds(t11-t10));
     }
 
     return;
 }
 
 
-void CacheBuilder::writeCacheHeader(const dyld_cache_mapping_info regions[3], const std::vector<DyldSharedCache::MappedMachO>& dylibs, const SegmentMapping& segmentMappings)
+void CacheBuilder::writeCacheHeader()
 {
     // "dyld_v1" + spaces + archName(), with enough spaces to pad to 15 bytes
     std::string magic = "dyld_v1";
@@ -502,60 +926,83 @@
     assert(magic.length() == 15);
 
     // fill in header
-    memcpy(_buffer->header.magic, magic.c_str(), 16);
-    _buffer->header.mappingOffset      = sizeof(dyld_cache_header);
-    _buffer->header.mappingCount       = 3;
-    _buffer->header.imagesOffset       = (uint32_t)(_buffer->header.mappingOffset + 3*sizeof(dyld_cache_mapping_info) + sizeof(uint64_t)*_branchPoolStarts.size());
-    _buffer->header.imagesCount        = (uint32_t)dylibs.size() + _aliasCount;
-    _buffer->header.dyldBaseAddress    = 0;
-    _buffer->header.codeSignatureOffset= 0;
-    _buffer->header.codeSignatureSize  = 0;
-    _buffer->header.slideInfoOffset    = _slideInfoFileOffset;
-    _buffer->header.slideInfoSize      = _slideInfoBufferSizeAllocated;
-    _buffer->header.localSymbolsOffset = 0;
-    _buffer->header.localSymbolsSize   = 0;
-    _buffer->header.cacheType          = _options.optimizeStubs ? kDyldSharedCacheTypeProduction : kDyldSharedCacheTypeDevelopment;
-    _buffer->header.accelerateInfoAddr = 0;
-    _buffer->header.accelerateInfoSize = 0;
-    bzero(_buffer->header.uuid, 16);    // overwritten later by recomputeCacheUUID()
-    _buffer->header.branchPoolsOffset    = _buffer->header.mappingOffset + 3*sizeof(dyld_cache_mapping_info);
-    _buffer->header.branchPoolsCount     = (uint32_t)_branchPoolStarts.size();
-    _buffer->header.imagesTextOffset     = _buffer->header.imagesOffset + sizeof(dyld_cache_image_info)*_buffer->header.imagesCount;
-    _buffer->header.imagesTextCount      = dylibs.size();
-    _buffer->header.platform             = (uint8_t)_options.platform;
-    _buffer->header.formatVersion        = dyld3::launch_cache::binary_format::kFormatVersion;
-    _buffer->header.dylibsExpectedOnDisk = !_options.dylibsRemovedDuringMastering;
-    _buffer->header.simulator            = _options.forSimulator;
-
-    // fill in mappings
-    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    mappings[0] = regions[0];
-    mappings[1] = regions[1];
-    mappings[2] = regions[2];
+    dyld_cache_header* dyldCacheHeader = (dyld_cache_header*)_readExecuteRegion.buffer;
+    memcpy(dyldCacheHeader->magic, magic.c_str(), 16);
+    dyldCacheHeader->mappingOffset        = sizeof(dyld_cache_header);
+    dyldCacheHeader->mappingCount         = 3;
+    dyldCacheHeader->imagesOffset         = (uint32_t)(dyldCacheHeader->mappingOffset + 3*sizeof(dyld_cache_mapping_info) + sizeof(uint64_t)*_branchPoolStarts.size());
+    dyldCacheHeader->imagesCount          = (uint32_t)_sortedDylibs.size() + _aliasCount;
+    dyldCacheHeader->dyldBaseAddress      = 0;
+    dyldCacheHeader->codeSignatureOffset  = 0;
+    dyldCacheHeader->codeSignatureSize    = 0;
+    dyldCacheHeader->slideInfoOffset      = _slideInfoFileOffset;
+    dyldCacheHeader->slideInfoSize        = _slideInfoBufferSizeAllocated;
+    dyldCacheHeader->localSymbolsOffset   = 0;
+    dyldCacheHeader->localSymbolsSize     = 0;
+    dyldCacheHeader->cacheType            = _options.optimizeStubs ? kDyldSharedCacheTypeProduction : kDyldSharedCacheTypeDevelopment;
+    dyldCacheHeader->accelerateInfoAddr   = 0;
+    dyldCacheHeader->accelerateInfoSize   = 0;
+    bzero(dyldCacheHeader->uuid, 16);// overwritten later by recomputeCacheUUID()
+    dyldCacheHeader->branchPoolsOffset    = dyldCacheHeader->mappingOffset + 3*sizeof(dyld_cache_mapping_info);
+    dyldCacheHeader->branchPoolsCount     = (uint32_t)_branchPoolStarts.size();
+    dyldCacheHeader->imagesTextOffset     = dyldCacheHeader->imagesOffset + sizeof(dyld_cache_image_info)*dyldCacheHeader->imagesCount;
+    dyldCacheHeader->imagesTextCount      = _sortedDylibs.size();
+    dyldCacheHeader->dylibsImageGroupAddr = 0;
+    dyldCacheHeader->dylibsImageGroupSize = 0;
+    dyldCacheHeader->otherImageGroupAddr  = 0;
+    dyldCacheHeader->otherImageGroupSize  = 0;
+    dyldCacheHeader->progClosuresAddr     = 0;
+    dyldCacheHeader->progClosuresSize     = 0;
+    dyldCacheHeader->progClosuresTrieAddr = 0;
+    dyldCacheHeader->progClosuresTrieSize = 0;
+    dyldCacheHeader->platform             = (uint8_t)_options.platform;
+    dyldCacheHeader->formatVersion        = dyld3::closure::kFormatVersion;
+    dyldCacheHeader->dylibsExpectedOnDisk = !_options.dylibsRemovedDuringMastering;
+    dyldCacheHeader->simulator            = _options.forSimulator;
+    dyldCacheHeader->locallyBuiltCache    = _options.isLocallyBuiltCache;
+    dyldCacheHeader->formatVersion        = dyld3::closure::kFormatVersion;
+    dyldCacheHeader->sharedRegionStart    = _archLayout->sharedMemoryStart;
+    dyldCacheHeader->sharedRegionSize     = _archLayout->sharedMemorySize;
+
+   // fill in mappings
+    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)(_readExecuteRegion.buffer + dyldCacheHeader->mappingOffset);
+    mappings[0].address    = _readExecuteRegion.unslidLoadAddress;
+    mappings[0].fileOffset = 0;
+    mappings[0].size       = _readExecuteRegion.sizeInUse;
+    mappings[0].maxProt    = VM_PROT_READ | VM_PROT_EXECUTE;
+    mappings[0].initProt   = VM_PROT_READ | VM_PROT_EXECUTE;
+    mappings[1].address    = _readWriteRegion.unslidLoadAddress;
+    mappings[1].fileOffset = _readExecuteRegion.sizeInUse;
+    mappings[1].size       = _readWriteRegion.sizeInUse;
+    mappings[1].maxProt    = VM_PROT_READ | VM_PROT_WRITE;
+    mappings[1].initProt   = VM_PROT_READ | VM_PROT_WRITE;
+    mappings[2].address    = _readOnlyRegion.unslidLoadAddress;
+    mappings[2].fileOffset = _readExecuteRegion.sizeInUse + _readWriteRegion.sizeInUse;
+    mappings[2].size       = _readOnlyRegion.sizeInUse;
+    mappings[2].maxProt    = VM_PROT_READ;
+    mappings[2].initProt   = VM_PROT_READ;
 
     // fill in branch pool addresses
-    uint64_t* p = (uint64_t*)((char*)_buffer + _buffer->header.branchPoolsOffset);
+    uint64_t* p = (uint64_t*)(_readExecuteRegion.buffer + dyldCacheHeader->branchPoolsOffset);
     for (uint64_t pool : _branchPoolStarts) {
         *p++ = pool;
     }
 
     // fill in image table
-    dyld_cache_image_info* images = (dyld_cache_image_info*)((char*)_buffer + _buffer->header.imagesOffset);
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        const std::vector<SegmentMappingInfo>& segs = segmentMappings.at(dylib.mh);
-        dyld3::MachOParser parser(dylib.mh);
-        const char* installName = parser.installName();
-        images->address = segs[0].dstCacheAddress;
+    dyld_cache_image_info* images = (dyld_cache_image_info*)(_readExecuteRegion.buffer + dyldCacheHeader->imagesOffset);
+    for (const DylibInfo& dylib : _sortedDylibs) {
+        const char* installName = dylib.input->mappedFile.mh->installName();
+        images->address = dylib.cacheLocation[0].dstCacheUnslidAddress;
         if ( _options.dylibsRemovedDuringMastering ) {
             images->modTime = 0;
             images->inode   = pathHash(installName);
         }
         else {
-            images->modTime = dylib.modTime;
-            images->inode   = dylib.inode;
-        }
-        uint32_t installNameOffsetInTEXT =  (uint32_t)(installName - (char*)dylib.mh);
-        images->pathFileOffset = (uint32_t)segs[0].dstCacheOffset + installNameOffsetInTEXT;
+            images->modTime = dylib.input->mappedFile.modTime;
+            images->inode   = dylib.input->mappedFile.inode;
+        }
+        uint32_t installNameOffsetInTEXT =  (uint32_t)(installName - (char*)dylib.input->mappedFile.mh);
+        images->pathFileOffset = (uint32_t)dylib.cacheLocation[0].dstCacheFileOffset + installNameOffsetInTEXT;
         ++images;
     }
     // append aliases image records and strings
@@ -582,488 +1029,337 @@
     }
 */
     // calculate start of text image array and trailing string pool
-    dyld_cache_image_text_info* textImages = (dyld_cache_image_text_info*)((char*)_buffer + _buffer->header.imagesTextOffset);
-    uint32_t stringOffset = (uint32_t)(_buffer->header.imagesTextOffset + sizeof(dyld_cache_image_text_info) * dylibs.size());
+    dyld_cache_image_text_info* textImages = (dyld_cache_image_text_info*)(_readExecuteRegion.buffer + dyldCacheHeader->imagesTextOffset);
+    uint32_t stringOffset = (uint32_t)(dyldCacheHeader->imagesTextOffset + sizeof(dyld_cache_image_text_info) * _sortedDylibs.size());
 
     // write text image array and image names pool at same time
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        const std::vector<SegmentMappingInfo>& segs = segmentMappings.at(dylib.mh);
-        dyld3::MachOParser parser(dylib.mh);
-        parser.getUuid(textImages->uuid);
-        textImages->loadAddress     = segs[0].dstCacheAddress;
-        textImages->textSegmentSize = (uint32_t)segs[0].dstCacheSegmentSize;
+    for (const DylibInfo& dylib : _sortedDylibs) {
+        dylib.input->mappedFile.mh->getUuid(textImages->uuid);
+        textImages->loadAddress     = dylib.cacheLocation[0].dstCacheUnslidAddress;
+        textImages->textSegmentSize = (uint32_t)dylib.cacheLocation[0].dstCacheSegmentSize;
         textImages->pathOffset      = stringOffset;
-        const char* installName = parser.installName();
-        ::strcpy((char*)_buffer + stringOffset, installName);
+        const char* installName = dylib.input->mappedFile.mh->installName();
+        ::strcpy((char*)_readExecuteRegion.buffer + stringOffset, installName);
         stringOffset += (uint32_t)strlen(installName)+1;
         ++textImages;
     }
 
     // make sure header did not overflow into first mapped image
-    const dyld_cache_image_info* firstImage = (dyld_cache_image_info*)((char*)_buffer + _buffer->header.imagesOffset);
+    const dyld_cache_image_info* firstImage = (dyld_cache_image_info*)(_readExecuteRegion.buffer + dyldCacheHeader->imagesOffset);
     assert(stringOffset <= (firstImage->address - mappings[0].address));
 }
 
-
-void CacheBuilder::copyRawSegments(const std::vector<DyldSharedCache::MappedMachO>& dylibs, const SegmentMapping& mapping)
-{
-    uint8_t* cacheBytes = (uint8_t*)_buffer;
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        auto pos = mapping.find(dylib.mh);
-        assert(pos != mapping.end());
-        for (const SegmentMappingInfo& info : pos->second) {
-            //fprintf(stderr, "copy %s segment %s (0x%08X bytes) from %p to %p (logical addr 0x%llX) for %s\n", _options.archName.c_str(), info.segName, info.copySegmentSize, info.srcSegment, &cacheBytes[info.dstCacheOffset], info.dstCacheAddress, dylib.runtimePath.c_str());
-            ::memcpy(&cacheBytes[info.dstCacheOffset], info.srcSegment, info.copySegmentSize);
-        }
-    }
-}
-
-void CacheBuilder::adjustAllImagesForNewSegmentLocations(const std::vector<DyldSharedCache::MappedMachO>& dylibs, const SegmentMapping& mapping)
-{
-    uint8_t* cacheBytes = (uint8_t*)_buffer;
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        auto pos = mapping.find(dylib.mh);
-        assert(pos != mapping.end());
-        mach_header* mhInCache = (mach_header*)&cacheBytes[pos->second[0].dstCacheOffset];
-        adjustDylibSegments(_buffer, _archLayout->is64, mhInCache, pos->second, _pointersForASLR, _diagnostics);
-        if ( _diagnostics.hasError() )
+void CacheBuilder::copyRawSegments()
+{
+    const bool log = false;
+    dispatch_apply(_sortedDylibs.size(), DISPATCH_APPLY_AUTO, ^(size_t index) {
+        const DylibInfo& dylib = _sortedDylibs[index];
+        for (const SegmentMappingInfo& info : dylib.cacheLocation) {
+            if (log) fprintf(stderr, "copy %s segment %s (0x%08X bytes) from %p to %p (logical addr 0x%llX) for %s\n",
+                             _options.archName.c_str(), info.segName, info.copySegmentSize, info.srcSegment, info.dstSegment, info.dstCacheUnslidAddress, dylib.input->mappedFile.runtimePath.c_str());
+            ::memcpy(info.dstSegment, info.srcSegment, info.copySegmentSize);
+            if (uint64_t paddingSize = info.dstCacheSegmentSize - info.copySegmentSize) {
+                ::memset((char*)info.dstSegment + info.copySegmentSize, 0, paddingSize);
+            }
+        }
+    });
+}
+
+void CacheBuilder::adjustAllImagesForNewSegmentLocations()
+{
+    __block std::vector<Diagnostics> diags;
+    diags.resize(_sortedDylibs.size());
+
+    if (_options.platform == dyld3::Platform::macOS) {
+        dispatch_apply(_sortedDylibs.size(), DISPATCH_APPLY_AUTO, ^(size_t index) {
+            const DylibInfo& dylib = _sortedDylibs[index];
+            adjustDylibSegments(dylib, diags[index]);
+        });
+    } else {
+        // Note this has to be done in serial because the LOH Tracker isn't thread safe
+        for (size_t index = 0; index != _sortedDylibs.size(); ++index) {
+            const DylibInfo& dylib = _sortedDylibs[index];
+            adjustDylibSegments(dylib, diags[index]);
+        }
+    }
+
+    for (const Diagnostics& diag : diags) {
+        if ( diag.hasError() ) {
+            _diagnostics.error("%s", diag.errorMessage().c_str());
             break;
-    }
-}
-
-struct Counts {
-    unsigned long lazyCount    = 0;
-    unsigned long nonLazyCount = 0;
-};
-
-void CacheBuilder::bindAllImagesInCacheFile(const dyld_cache_mapping_info regions[3])
-{
-    const bool log = false;
-    __block std::unordered_map<std::string, Counts> useCounts;
-
-    // build map of install names to mach_headers
-    __block std::unordered_map<std::string, const mach_header*> installNameToMH;
-    __block std::vector<const mach_header*> dylibMHs;
-    _buffer->forEachImage(^(const mach_header* mh, const char* installName) {
-        installNameToMH[installName] = mh;
-        dylibMHs.push_back(mh);
-    });
-
-    __block Diagnostics parsingDiag;
-    bool (^dylibFinder)(uint32_t, const char*, void* , const mach_header**, void**) = ^(uint32_t depIndex, const char* depLoadPath, void* extra, const mach_header** foundMH, void** foundExtra) {
-        auto pos = installNameToMH.find(depLoadPath);
-        if ( pos != installNameToMH.end() ) {
-            *foundMH = pos->second;
-            *foundExtra = nullptr;
-            return true;
-        }
-        parsingDiag.error("dependent dylib %s not found", depLoadPath);
-        return false;
-    };
-    if ( parsingDiag.hasError() ) {
-        _diagnostics.error("%s", parsingDiag.errorMessage().c_str());
-        return;
-    }
-
-    // bind every dylib in cache
-    for (const mach_header* mh : dylibMHs) {
-        dyld3::MachOParser parser(mh, true);
-        bool is64 = parser.is64();
-        const char* depPaths[256];
-        const char** depPathsArray = depPaths;
-        __block int depIndex = 1;
-        parser.forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
-            depPathsArray[depIndex++] = loadPath;
-        });
-        uint8_t*  segCacheStarts[10];
-        uint64_t  segCacheAddrs[10];
-        uint8_t** segCacheStartsArray = segCacheStarts;
-        uint64_t* segCacheAddrsArray  = segCacheAddrs;
-        __block int segIndex = 0;
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
-             segCacheStartsArray[segIndex] = (segIndex == 0) ? (uint8_t*)mh : (uint8_t*)_buffer + fileOffset;
-             segCacheAddrsArray[segIndex] = vmAddr;
-             ++segIndex;
-        });
-        __block Diagnostics bindingDiag;
-        parser.forEachBind(bindingDiag, ^(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, int libOrdinal, uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop) {
-            if ( log ) {
-                if ( lazy )
-                    useCounts[symbolName].lazyCount += 1;
-                else
-                    useCounts[symbolName].nonLazyCount += 1;
-            }
-            const mach_header* targetMH = nullptr;
-            if ( libOrdinal == BIND_SPECIAL_DYLIB_SELF ) {
-                targetMH = mh;
-            }
-            else if ( libOrdinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE ) {
-                parsingDiag.error("bind ordinal BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE not supported in dylibs in dyld shared cache (found in %s)", parser.installName());
-                stop = true;
-                return;
-            }
-            else if ( libOrdinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP ) {
-                parsingDiag.error("bind ordinal BIND_SPECIAL_DYLIB_FLAT_LOOKUP not supported in dylibs in dyld shared cache (found in %s)", parser.installName());
-                stop = true;
-                return;
-            }
-            else {
-                const char* fromPath = depPathsArray[libOrdinal];
-                auto pos = installNameToMH.find(fromPath);
-                if (pos == installNameToMH.end()) {
-                    if (!weakImport) {
-                        _diagnostics.error("dependent dylib %s not found", fromPath);
-                    }
-                    return;
-                }
-                targetMH = pos->second;
-            }
-            dyld3::MachOParser targetParser(targetMH, true);
-            dyld3::MachOParser::FoundSymbol foundInfo;
-            uint64_t targetValue = 0;
-            uint8_t* fixupLoc = segCacheStartsArray[dataSegIndex] + dataSegOffset;
-            if ( targetParser.findExportedSymbol(parsingDiag, symbolName, nullptr, foundInfo, dylibFinder) ) {
-                const mach_header* foundInMH = foundInfo.foundInDylib;
-                dyld3::MachOParser foundInParser(foundInMH, true);
-                uint64_t foundInBaseAddress = foundInParser.preferredLoadAddress();
-                switch ( foundInfo.kind ) {
-                    case dyld3::MachOParser::FoundSymbol::Kind::resolverOffset:
-                        // Bind to the target stub for resolver based functions.
-                        // There may be a later optimization to alter the client
-                        // stubs to directly to the target stub's lazy pointer.
-                    case dyld3::MachOParser::FoundSymbol::Kind::headerOffset:
-                        targetValue = foundInBaseAddress + foundInfo.value + addend;
-                        _pointersForASLR.push_back((void*)fixupLoc);
-                        if ( foundInMH != mh ) {
-                            uint32_t mhVmOffset                 = (uint32_t)((uint8_t*)foundInMH - (uint8_t*)_buffer);
-                            uint32_t definitionCacheVmOffset    = (uint32_t)(mhVmOffset + foundInfo.value);
-                            uint32_t referenceCacheDataVmOffset = (uint32_t)(segCacheAddrsArray[dataSegIndex] + dataSegOffset - regions[1].address);
-                            assert(referenceCacheDataVmOffset < (1<<30));
-                            dyld3::launch_cache::binary_format::PatchOffset entry;
-                            entry.last              = false;
-                            entry.hasAddend         = (addend != 0);
-                            entry.dataRegionOffset  = referenceCacheDataVmOffset;
-                            _patchTable[foundInMH][definitionCacheVmOffset].insert(*((uint32_t*)&entry));
-                        }
-                       break;
-                    case dyld3::MachOParser::FoundSymbol::Kind::absolute:
-                        // pointers set to absolute values are not slid
-                        targetValue = foundInfo.value + addend;
-                        break;
-                }
-            }
-            else if ( weakImport ) {
-                // weak pointers set to zero are not slid
-                targetValue = 0;
-            }
-            else {
-                parsingDiag.error("cannot find symbol %s, needed in dylib %s", symbolName, parser.installName());
-                stop = true;
-            }
-            switch ( type ) {
-                case BIND_TYPE_POINTER:
-                    if ( is64 )
-                        *((uint64_t*)fixupLoc) = targetValue;
-                    else
-                        *((uint32_t*)fixupLoc) = (uint32_t)targetValue;
-                    break;
-                case BIND_TYPE_TEXT_ABSOLUTE32:
-                case BIND_TYPE_TEXT_PCREL32:
-                    parsingDiag.error("text relocs not supported for shared cache binding in %s", parser.installName());
-                    stop = true;
-                    break;
-                default:
-                    parsingDiag.error("bad bind type (%d) in %s", type, parser.installName());
-                    stop = true;
-                    break;
-
-            }
-        });
-        if ( bindingDiag.hasError() ) {
-            parsingDiag.error("%s in dylib %s", bindingDiag.errorMessage().c_str(), parser.installName());
-        }
-        if ( parsingDiag.hasError() )
-            break;
-        // also need to add patch locations for weak-binds that point within same image, since they are not captured by binds above
-        parser.forEachWeakDef(bindingDiag, ^(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset, uint64_t addend, const char* symbolName, bool &stop) {
-            if ( strongDef )
-                return;
-            uint8_t* fixupLoc = segCacheStartsArray[dataSegIndex] + dataSegOffset;
-            dyld3::MachOParser::FoundSymbol weakFoundInfo;
-            Diagnostics weakLookupDiag;
-            if ( parser.findExportedSymbol(weakLookupDiag, symbolName, nullptr, weakFoundInfo, nullptr) ) {
-                // this is an interior pointing (rebased) pointer
-                uint64_t targetValue;
-                if ( is64 )
-                    targetValue = *((uint64_t*)fixupLoc);
-                else
-                    targetValue = *((uint32_t*)fixupLoc);
-                uint32_t definitionCacheVmOffset    = (uint32_t)(targetValue - regions[0].address);
-                uint32_t referenceCacheDataVmOffset = (uint32_t)(segCacheAddrsArray[dataSegIndex] + dataSegOffset - regions[1].address);
-                assert(referenceCacheDataVmOffset < (1<<30));
-                dyld3::launch_cache::binary_format::PatchOffset entry;
-                entry.last              = false;
-                entry.hasAddend         = (addend != 0);
-                entry.dataRegionOffset  = referenceCacheDataVmOffset;
-                _patchTable[mh][definitionCacheVmOffset].insert(*((uint32_t*)&entry));
-            }
-        });
-        if ( bindingDiag.hasError() ) {
-            parsingDiag.error("%s in dylib %s", bindingDiag.errorMessage().c_str(), parser.installName());
-        }
-        if ( parsingDiag.hasError() )
-            break;
-    }
-
-    if ( log ) {
-        unsigned lazyCount = 0;
-        unsigned nonLazyCount = 0;
-        std::unordered_set<std::string> lazyTargets;
-        for (auto entry : useCounts) {
-            fprintf(stderr, "% 3ld      % 3ld     %s\n", entry.second.lazyCount, entry.second.nonLazyCount, entry.first.c_str());
-            lazyCount += entry.second.lazyCount;
-            nonLazyCount += entry.second.nonLazyCount;
-            if ( entry.second.lazyCount != 0 )
-                lazyTargets.insert(entry.first);
-        }
-        fprintf(stderr, "lazyCount = %d\n", lazyCount);
-        fprintf(stderr, "nonLazyCount = %d\n", nonLazyCount);
-        fprintf(stderr, "unique lazys = %ld\n", lazyTargets.size());
-    }
-    
-    if ( parsingDiag.hasError() )
-        _diagnostics.error("%s", parsingDiag.errorMessage().c_str());
-}
-
-
-void CacheBuilder::recomputeCacheUUID(void)
-{
-    // Clear existing UUID, then MD5 whole cache buffer.
-    uint8_t* uuidLoc = _buffer->header.uuid;
-    bzero(uuidLoc, 16);
-    CC_MD5(_buffer, (unsigned)_currentFileSize, uuidLoc);
-    // <rdar://problem/6723729> uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
-    uuidLoc[6] = ( uuidLoc[6] & 0x0F ) | ( 3 << 4 );
-    uuidLoc[8] = ( uuidLoc[8] & 0x3F ) | 0x80;
-}
-
-
-CacheBuilder::SegmentMapping CacheBuilder::assignSegmentAddresses(const std::vector<DyldSharedCache::MappedMachO>& dylibs, dyld_cache_mapping_info regions[3])
+        }
+    }
+}
+
+void CacheBuilder::assignSegmentAddresses()
 {
     // calculate size of header info and where first dylib's mach_header should start
     size_t startOffset = sizeof(dyld_cache_header) + 3*sizeof(dyld_cache_mapping_info);
     size_t maxPoolCount = 0;
-    if (  _archLayout->branchReach != 0 )
+    if ( _archLayout->branchReach != 0 )
         maxPoolCount = (_archLayout->sharedMemorySize / _archLayout->branchReach);
     startOffset += maxPoolCount * sizeof(uint64_t);
-    startOffset += sizeof(dyld_cache_image_info) * dylibs.size();
-    startOffset += sizeof(dyld_cache_image_text_info) * dylibs.size();
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh);
-        startOffset += (strlen(parser.installName()) + 1);
+    startOffset += sizeof(dyld_cache_image_info) * _sortedDylibs.size();
+    startOffset += sizeof(dyld_cache_image_text_info) * _sortedDylibs.size();
+    for (const DylibInfo& dylib : _sortedDylibs) {
+        startOffset += (strlen(dylib.input->mappedFile.mh->installName()) + 1);
     }
     //fprintf(stderr, "%s total header size = 0x%08lX\n", _options.archName.c_str(), startOffset);
     startOffset = align(startOffset, 12);
 
     _branchPoolStarts.clear();
-    __block uint64_t addr = _archLayout->sharedMemoryStart;
-    __block SegmentMapping result;
 
     // assign TEXT segment addresses
-    regions[0].address      = addr;
-    regions[0].fileOffset   = 0;
-    regions[0].initProt     = VM_PROT_READ | VM_PROT_EXECUTE;
-    regions[0].maxProt      = VM_PROT_READ | VM_PROT_EXECUTE;
-    addr += startOffset; // header
-
+    _readExecuteRegion.buffer               = (uint8_t*)_fullAllocatedBuffer;
+    _readExecuteRegion.bufferSize           = 0;
+    _readExecuteRegion.sizeInUse            = 0;
+    _readExecuteRegion.unslidLoadAddress    = _archLayout->sharedMemoryStart;
+    _readExecuteRegion.cacheFileOffset      = 0;
+    __block uint64_t addr = _readExecuteRegion.unslidLoadAddress + startOffset; // header
     __block uint64_t lastPoolAddress = addr;
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-             if ( protections != (VM_PROT_READ | VM_PROT_EXECUTE) )
+    for (DylibInfo& dylib : _sortedDylibs) {
+        __block uint64_t textSegVmAddr = 0;
+        dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != (VM_PROT_READ | VM_PROT_EXECUTE) )
                 return;
             // Insert branch island pools every 128MB for arm64
-            if ( (_archLayout->branchPoolTextSize != 0) && ((addr + vmSize - lastPoolAddress) > _archLayout->branchReach) ) {
+            if ( (_archLayout->branchPoolTextSize != 0) && ((addr + segInfo.vmSize - lastPoolAddress) > _archLayout->branchReach) ) {
                 _branchPoolStarts.push_back(addr);
                 _diagnostics.verbose("adding branch pool at 0x%llX\n", addr);
                 lastPoolAddress = addr;
                 addr += _archLayout->branchPoolTextSize;
             }
             // Keep __TEXT segments 4K or more aligned
-            addr = align(addr, std::max(p2align, (uint8_t)12));
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[0].address + regions[0].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)align(sizeOfSections, 12);
-            info.copySegmentSize     = (uint32_t)align(sizeOfSections, 12);
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            addr = align(addr, std::max((int)segInfo.p2align, (int)12));
+            uint64_t offsetInRegion = addr - _readExecuteRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readExecuteRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)offsetInRegion;
+            loc.dstCacheSegmentSize    = (uint32_t)align(segInfo.sizeOfSections, 12);
+            loc.copySegmentSize        = (uint32_t)align(segInfo.sizeOfSections, 12);
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
     // align TEXT region end
     uint64_t endTextAddress = align(addr, _archLayout->sharedRegionAlignP2);
-    regions[0].size         = endTextAddress - regions[0].address;
+    _readExecuteRegion.bufferSize = endTextAddress - _readExecuteRegion.unslidLoadAddress;
+    _readExecuteRegion.sizeInUse  = _readExecuteRegion.bufferSize;
 
     // assign __DATA* addresses
     if ( _archLayout->sharedRegionsAreDiscontiguous )
         addr = _archLayout->sharedMemoryStart + 0x60000000;
     else
         addr = align((addr + _archLayout->sharedRegionPadding), _archLayout->sharedRegionAlignP2);
-    regions[1].address      = addr;
-    regions[1].fileOffset   = regions[0].fileOffset + regions[0].size;
-    regions[1].initProt     = VM_PROT_READ | VM_PROT_WRITE;
-    regions[1].maxProt      = VM_PROT_READ | VM_PROT_WRITE;
+    _readWriteRegion.buffer               = (uint8_t*)_fullAllocatedBuffer + addr - _archLayout->sharedMemoryStart;
+    _readWriteRegion.bufferSize           = 0;
+    _readWriteRegion.sizeInUse            = 0;
+    _readWriteRegion.unslidLoadAddress    = addr;
+    _readWriteRegion.cacheFileOffset      = _readExecuteRegion.sizeInUse;
 
     // layout all __DATA_CONST segments
     __block int dataConstSegmentCount = 0;
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-            if ( protections != (VM_PROT_READ | VM_PROT_WRITE) )
+    for (DylibInfo& dylib : _sortedDylibs) {
+        __block uint64_t textSegVmAddr = 0;
+       dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != (VM_PROT_READ | VM_PROT_WRITE) )
                 return;
-            if ( strcmp(segName, "__DATA_CONST") != 0 )
+            if ( strcmp(segInfo.segName, "__DATA_CONST") != 0 )
                 return;
             ++dataConstSegmentCount;
             // Pack __DATA_CONST segments
-            addr = align(addr, p2align);
-            size_t copySize = std::min((size_t)fileSize, (size_t)sizeOfSections);
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[1].address + regions[1].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)sizeOfSections;
-            info.copySegmentSize     = (uint32_t)copySize;
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            addr = align(addr, segInfo.p2align);
+            size_t copySize = std::min((size_t)segInfo.fileSize, (size_t)segInfo.sizeOfSections);
+            uint64_t offsetInRegion = addr - _readWriteRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readWriteRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)(_readWriteRegion.cacheFileOffset + offsetInRegion);
+            loc.dstCacheSegmentSize    = (uint32_t)segInfo.sizeOfSections;
+            loc.copySegmentSize        = (uint32_t)copySize;
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
 
     // layout all __DATA segments (and other r/w non-dirty, non-const) segments
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-            if ( protections != (VM_PROT_READ | VM_PROT_WRITE) )
+    for (DylibInfo& dylib : _sortedDylibs) {
+        __block uint64_t textSegVmAddr = 0;
+       dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != (VM_PROT_READ | VM_PROT_WRITE) )
                 return;
-            if ( strcmp(segName, "__DATA_CONST") == 0 )
+            if ( strcmp(segInfo.segName, "__DATA_CONST") == 0 )
                 return;
-            if ( strcmp(segName, "__DATA_DIRTY") == 0 )
+            if ( strcmp(segInfo.segName, "__DATA_DIRTY") == 0 )
                 return;
             if ( dataConstSegmentCount > 10 ) {
                 // Pack __DATA segments only if we also have __DATA_CONST segments
-                addr = align(addr, p2align);
+                addr = align(addr, segInfo.p2align);
             }
             else {
                 // Keep __DATA segments 4K or more aligned
-                addr = align(addr, std::max(p2align, (uint8_t)12));
+                addr = align(addr, std::max((int)segInfo.p2align, (int)12));
             }
-            size_t copySize = std::min((size_t)fileSize, (size_t)sizeOfSections);
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[1].address + regions[1].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)sizeOfSections;
-            info.copySegmentSize     = (uint32_t)copySize;
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            size_t copySize = std::min((size_t)segInfo.fileSize, (size_t)segInfo.sizeOfSections);
+            uint64_t offsetInRegion = addr - _readWriteRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readWriteRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)(_readWriteRegion.cacheFileOffset + offsetInRegion);
+            loc.dstCacheSegmentSize    = (uint32_t)segInfo.sizeOfSections;
+            loc.copySegmentSize        = (uint32_t)copySize;
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
 
-    // layout all __DATA_DIRTY segments, sorted
+    // layout all __DATA_DIRTY segments, sorted (FIXME)
+	const size_t dylibCount = _sortedDylibs.size();
+    uint32_t dirtyDataSortIndexes[dylibCount];
+    for (size_t i=0; i < dylibCount; ++i)
+        dirtyDataSortIndexes[i] = (uint32_t)i;
+    std::sort(&dirtyDataSortIndexes[0], &dirtyDataSortIndexes[dylibCount], [&](const uint32_t& a, const uint32_t& b) {
+        const auto& orderA = _options.dirtyDataSegmentOrdering.find(_sortedDylibs[a].input->mappedFile.runtimePath);
+        const auto& orderB = _options.dirtyDataSegmentOrdering.find(_sortedDylibs[b].input->mappedFile.runtimePath);
+        bool foundA = (orderA != _options.dirtyDataSegmentOrdering.end());
+        bool foundB = (orderB != _options.dirtyDataSegmentOrdering.end());
+
+        // Order all __DATA_DIRTY segments specified in the order file first, in the order specified in the file,
+        // followed by any other __DATA_DIRTY segments in lexicographic order.
+        if ( foundA && foundB )
+            return orderA->second < orderB->second;
+        else if ( foundA )
+            return true;
+        else if ( foundB )
+             return false;
+        else
+             return _sortedDylibs[a].input->mappedFile.runtimePath < _sortedDylibs[b].input->mappedFile.runtimePath;
+    });
     addr = align(addr, 12);
-    std::vector<DyldSharedCache::MappedMachO> dirtyDataDylibs = makeSortedDylibs(dylibs, _options.dirtyDataSegmentOrdering);
-    for (const DyldSharedCache::MappedMachO& dylib : dirtyDataDylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-            if ( protections != (VM_PROT_READ | VM_PROT_WRITE) )
+    for (size_t i=0; i < dylibCount; ++i) {
+        DylibInfo& dylib  = _sortedDylibs[dirtyDataSortIndexes[i]];
+        __block uint64_t textSegVmAddr = 0;
+        dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != (VM_PROT_READ | VM_PROT_WRITE) )
                 return;
-            if ( strcmp(segName, "__DATA_DIRTY") != 0 )
+            if ( strcmp(segInfo.segName, "__DATA_DIRTY") != 0 )
                 return;
             // Pack __DATA_DIRTY segments
-            addr = align(addr, p2align);
-            size_t copySize = std::min((size_t)fileSize, (size_t)sizeOfSections);
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[1].address + regions[1].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)sizeOfSections;
-            info.copySegmentSize     = (uint32_t)copySize;
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            addr = align(addr, segInfo.p2align);
+            size_t copySize = std::min((size_t)segInfo.fileSize, (size_t)segInfo.sizeOfSections);
+            uint64_t offsetInRegion = addr - _readWriteRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readWriteRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)(_readWriteRegion.cacheFileOffset + offsetInRegion);
+            loc.dstCacheSegmentSize    = (uint32_t)segInfo.sizeOfSections;
+            loc.copySegmentSize        = (uint32_t)copySize;
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
 
     // align DATA region end
     uint64_t endDataAddress = align(addr, _archLayout->sharedRegionAlignP2);
-    regions[1].size         = endDataAddress - regions[1].address;
+    _readWriteRegion.bufferSize   = endDataAddress - _readWriteRegion.unslidLoadAddress;
+    _readWriteRegion.sizeInUse    = _readWriteRegion.bufferSize;
 
     // start read-only region
     if ( _archLayout->sharedRegionsAreDiscontiguous )
         addr = _archLayout->sharedMemoryStart + 0xA0000000;
     else
         addr = align((addr + _archLayout->sharedRegionPadding), _archLayout->sharedRegionAlignP2);
-    regions[2].address    = addr;
-    regions[2].fileOffset = regions[1].fileOffset + regions[1].size;
-    regions[2].maxProt    = VM_PROT_READ;
-    regions[2].initProt   = VM_PROT_READ;
+    _readOnlyRegion.buffer               = (uint8_t*)_fullAllocatedBuffer + addr - _archLayout->sharedMemoryStart;
+    _readOnlyRegion.bufferSize           = 0;
+    _readOnlyRegion.sizeInUse            = 0;
+    _readOnlyRegion.unslidLoadAddress    = addr;
+    _readOnlyRegion.cacheFileOffset      = _readWriteRegion.cacheFileOffset + _readWriteRegion.sizeInUse;
 
     // reserve space for kernel ASLR slide info at start of r/o region
     if ( _options.cacheSupportsASLR ) {
-        _slideInfoBufferSizeAllocated = align((regions[1].size/4096) * 4, _archLayout->sharedRegionAlignP2); // only need 2 bytes per page
-        _slideInfoFileOffset = regions[2].fileOffset;
+        size_t slideInfoSize = sizeof(dyld_cache_slide_info);
+        slideInfoSize = std::max(slideInfoSize, sizeof(dyld_cache_slide_info2));
+        slideInfoSize = std::max(slideInfoSize, sizeof(dyld_cache_slide_info3));
+        slideInfoSize = std::max(slideInfoSize, sizeof(dyld_cache_slide_info4));
+        _slideInfoBufferSizeAllocated = align(slideInfoSize + (_readWriteRegion.sizeInUse/4096) * _archLayout->slideInfoBytesPerPage, _archLayout->sharedRegionAlignP2);
+        _slideInfoFileOffset = _readOnlyRegion.cacheFileOffset;
         addr += _slideInfoBufferSizeAllocated;
     }
 
     // layout all read-only (but not LINKEDIT) segments
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-            if ( protections != VM_PROT_READ )
+    for (DylibInfo& dylib : _sortedDylibs) {
+        __block uint64_t textSegVmAddr = 0;
+        dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != VM_PROT_READ )
                 return;
-            if ( strcmp(segName, "__LINKEDIT") == 0 )
+            if ( strcmp(segInfo.segName, "__LINKEDIT") == 0 )
                 return;
             // Keep segments segments 4K or more aligned
-            addr = align(addr, std::max(p2align, (uint8_t)12));
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[2].address + regions[2].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)align(sizeOfSections, 12);
-            info.copySegmentSize     = (uint32_t)sizeOfSections;
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            addr = align(addr, std::max((int)segInfo.p2align, (int)12));
+            uint64_t offsetInRegion = addr - _readOnlyRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readOnlyRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)(_readOnlyRegion.cacheFileOffset + offsetInRegion);
+            loc.dstCacheSegmentSize    = (uint32_t)align(segInfo.sizeOfSections, 12);
+            loc.copySegmentSize        = (uint32_t)segInfo.sizeOfSections;
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
-    // layout all LINKEDIT segments (after other read-only segments)
-    for (const DyldSharedCache::MappedMachO& dylib : dylibs) {
-        dyld3::MachOParser parser(dylib.mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, uint32_t segIndex, uint64_t sizeOfSections, uint8_t p2align, bool& stop) {
-            if ( protections != VM_PROT_READ )
+    // layout all LINKEDIT segments (after other read-only segments), aligned to 16KB
+    addr = align(addr, 14);
+    _nonLinkEditReadOnlySize =  addr - _readOnlyRegion.unslidLoadAddress;
+    for (DylibInfo& dylib : _sortedDylibs) {
+        __block uint64_t textSegVmAddr = 0;
+        dylib.input->mappedFile.mh->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& segInfo, bool& stop) {
+            if ( strcmp(segInfo.segName, "__TEXT") == 0 )
+                textSegVmAddr = segInfo.vmAddr;
+            if ( segInfo.protections != VM_PROT_READ )
                 return;
-            if ( strcmp(segName, "__LINKEDIT") != 0 )
+            if ( strcmp(segInfo.segName, "__LINKEDIT") != 0 )
                 return;
             // Keep segments segments 4K or more aligned
-            addr = align(addr, std::max(p2align, (uint8_t)12));
-            SegmentMappingInfo info;
-            info.srcSegment          = (uint8_t*)dylib.mh + fileOffset;
-            info.segName             = segName;
-            info.dstCacheAddress     = addr;
-            info.dstCacheOffset      = (uint32_t)(addr - regions[2].address + regions[2].fileOffset);
-            info.dstCacheSegmentSize = (uint32_t)align(sizeOfSections, 12);
-            info.copySegmentSize     = (uint32_t)align(fileSize, 12);
-            info.srcSegmentIndex     = segIndex;
-            result[dylib.mh].push_back(info);
-            addr += info.dstCacheSegmentSize;
+            addr = align(addr, std::max((int)segInfo.p2align, (int)12));
+            size_t copySize = std::min((size_t)segInfo.fileSize, (size_t)segInfo.sizeOfSections);
+            uint64_t offsetInRegion = addr - _readOnlyRegion.unslidLoadAddress;
+            SegmentMappingInfo loc;
+            loc.srcSegment             = (uint8_t*)dylib.input->mappedFile.mh + segInfo.vmAddr - textSegVmAddr;
+            loc.segName                = segInfo.segName;
+            loc.dstSegment             = _readOnlyRegion.buffer + offsetInRegion;
+            loc.dstCacheUnslidAddress  = addr;
+            loc.dstCacheFileOffset     = (uint32_t)(_readOnlyRegion.cacheFileOffset + offsetInRegion);
+            loc.dstCacheSegmentSize    = (uint32_t)align(segInfo.sizeOfSections, 12);
+            loc.copySegmentSize        = (uint32_t)copySize;
+            loc.srcSegmentIndex        = segInfo.segIndex;
+            dylib.cacheLocation.push_back(loc);
+            addr += loc.dstCacheSegmentSize;
         });
     }
     // add room for branch pool linkedits
@@ -1072,28 +1368,34 @@
 
     // align r/o region end
     uint64_t endReadOnlyAddress = align(addr, _archLayout->sharedRegionAlignP2);
-    regions[2].size = endReadOnlyAddress - regions[2].address;
-    _currentFileSize = regions[2].fileOffset + regions[2].size;
-
-    // FIXME: Confirm these numbers for all platform/arch combos
-    // assume LINKEDIT optimzation reduces LINKEDITs to %40 of original size
-    if ( _options.excludeLocalSymbols ) {
-        _vmSize = regions[2].address + (regions[2].size * 2 / 5) - regions[0].address;
-    }
-    else {
-        _vmSize = regions[2].address + (regions[2].size * 9 / 10) - regions[0].address;
-    }
+    _readOnlyRegion.bufferSize  = endReadOnlyAddress - _readOnlyRegion.unslidLoadAddress;
+    _readOnlyRegion.sizeInUse   = _readOnlyRegion.bufferSize;
+
+    //fprintf(stderr, "RX region=%p -> %p, logical addr=0x%llX\n", _readExecuteRegion.buffer, _readExecuteRegion.buffer+_readExecuteRegion.bufferSize, _readExecuteRegion.unslidLoadAddress);
+    //fprintf(stderr, "RW region=%p -> %p, logical addr=0x%llX\n", _readWriteRegion.buffer,   _readWriteRegion.buffer+_readWriteRegion.bufferSize, _readWriteRegion.unslidLoadAddress);
+    //fprintf(stderr, "RO region=%p -> %p, logical addr=0x%llX\n", _readOnlyRegion.buffer,    _readOnlyRegion.buffer+_readOnlyRegion.bufferSize, _readOnlyRegion.unslidLoadAddress);
 
     // sort SegmentMappingInfo for each image to be in the same order as original segments
-    for (auto& entry : result) {
-        std::vector<SegmentMappingInfo>& infos = entry.second;
-        std::sort(infos.begin(), infos.end(), [&](const SegmentMappingInfo& a, const SegmentMappingInfo& b) {
+    for (DylibInfo& dylib : _sortedDylibs) {
+        std::sort(dylib.cacheLocation.begin(), dylib.cacheLocation.end(), [&](const SegmentMappingInfo& a, const SegmentMappingInfo& b) {
             return a.srcSegmentIndex < b.srcSegmentIndex;
         });
     }
-
-    return result;
-}
+}
+
+void CacheBuilder::markPaddingInaccessible()
+{
+    // region between RX and RW
+    uint8_t* startPad1 = _readExecuteRegion.buffer+_readExecuteRegion.sizeInUse;
+    uint8_t* endPad1   = _readWriteRegion.buffer;
+    ::vm_protect(mach_task_self(), (vm_address_t)startPad1, endPad1-startPad1, false, 0);
+
+    // region between RW and RO
+    uint8_t* startPad2 = _readWriteRegion.buffer+_readWriteRegion.sizeInUse;
+    uint8_t* endPad2   = _readOnlyRegion.buffer;
+    ::vm_protect(mach_task_self(), (vm_address_t)startPad2, endPad2-startPad2, false, 0);
+}
+
 
 uint64_t CacheBuilder::pathHash(const char* path)
 {
@@ -1108,21 +1410,22 @@
 {
     foundDylibName = "???";
     foundSegName   = "???";
-    uint32_t cacheOffset = (uint32_t)((uint8_t*)contentPtr - (uint8_t*)_buffer);
-    _buffer->forEachImage(^(const mach_header* mh, const char* installName) {
-        dyld3::MachOParser parser(mh, true);
-        parser.forEachSegment(^(const char* segName, uint32_t fileOffset, uint32_t fileSize, uint64_t vmAddr, uint64_t vmSize, uint8_t protections, bool& stop) {
-            if ( (cacheOffset > fileOffset) && (cacheOffset < (fileOffset+vmSize)) ) {
+    uint64_t unslidVmAddr = ((uint8_t*)contentPtr - _readExecuteRegion.buffer) + _readExecuteRegion.unslidLoadAddress;
+    const DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    cache->forEachImage(^(const mach_header* mh, const char* installName) {
+        ((dyld3::MachOLoaded*)mh)->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& info, bool &stop) {
+            if ( (unslidVmAddr >= info.vmAddr) && (unslidVmAddr < (info.vmAddr+info.vmSize)) ) {
                 foundDylibName = installName;
-                foundSegName = segName;
+                foundSegName   = info.segName;
+                stop           = true;
             }
         });
     });
- }
+}
 
 
 template <typename P>
-bool CacheBuilder::makeRebaseChain(uint8_t* pageContent, uint16_t lastLocationOffset, uint16_t offset, const dyld_cache_slide_info2* info)
+bool CacheBuilder::makeRebaseChainV2(uint8_t* pageContent, uint16_t lastLocationOffset, uint16_t offset, const dyld_cache_slide_info2* info)
 {
     typedef typename P::uint_t     pint_t;
 
@@ -1152,7 +1455,7 @@
         P::setP(*lastLoc, newLastValue);
         return true;
     }
-    //warning("  too big delta = %d, lastOffset=0x%03X, offset=0x%03X", offset - lastLocationOffset, lastLocationOffset, offset);
+    //fprintf(stderr, "  too big delta = %d, lastOffset=0x%03X, offset=0x%03X\n", offset - lastLocationOffset, lastLocationOffset, offset);
 
     // distance between rebase locations is too far
     // see if we can make a chain from non-rebase locations
@@ -1182,7 +1485,7 @@
     // we can make chain. go back and add each non-rebase location to chain
     uint16_t prevOffset = lastLocationOffset;
     pint_t* prevLoc = (pint_t*)&pageContent[prevOffset];
-    for (int n=0; n < nrIndex; ++n) {
+    for (unsigned n=0; n < nrIndex; ++n) {
         uint16_t nOffset = nonRebaseLocationOffsets[n];
         assert(nOffset != 0);
         pint_t* nLoc = (pint_t*)&pageContent[nOffset];
@@ -1213,7 +1516,7 @@
 
 
 template <typename P>
-void CacheBuilder::addPageStarts(uint8_t* pageContent, const bool bitmap[], const dyld_cache_slide_info2* info,
+void CacheBuilder::addPageStartsV2(uint8_t* pageContent, const bool bitmap[], const dyld_cache_slide_info2* info,
                                 std::vector<uint16_t>& pageStarts, std::vector<uint16_t>& pageExtras)
 {
     typedef typename P::uint_t     pint_t;
@@ -1225,20 +1528,20 @@
 
     uint16_t startValue = DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE;
     uint16_t lastLocationOffset = 0xFFFF;
-    for(int i=0; i < pageSize/4; ++i) {
+    for(uint32_t i=0; i < pageSize/4; ++i) {
         unsigned offset = i*4;
         if ( bitmap[i] ) {
             if ( startValue == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE ) {
                 // found first rebase location in page
                 startValue = i;
             }
-            else if ( !makeRebaseChain<P>(pageContent, lastLocationOffset, offset, info) ) {
+            else if ( !makeRebaseChainV2<P>(pageContent, lastLocationOffset, offset, info) ) {
                 // can't record all rebasings in one chain
                 if ( (startValue & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) == 0 ) {
                     // switch page_start to "extras" which is a list of chain starts
                     unsigned indexInExtras = (unsigned)pageExtras.size();
                     if ( indexInExtras > 0x3FFF ) {
-                        _diagnostics.error("rebase overflow in page extras");
+                        _diagnostics.error("rebase overflow in v2 page extras");
                         return;
                     }
                     pageExtras.push_back(startValue);
@@ -1264,46 +1567,15 @@
 }
 
 template <typename P>
-void CacheBuilder::writeSlideInfoV2()
+void CacheBuilder::writeSlideInfoV2(const bool bitmap[], unsigned dataPageCount)
 {
     typedef typename P::uint_t    pint_t;
     typedef typename P::E         E;
     const uint32_t pageSize = 4096;
 
-    // build one 1024/4096 bool bitmap per page (4KB/16KB) of DATA
-    const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    uint8_t* const dataStart = (uint8_t*)_buffer + mappings[1].fileOffset;
-    uint8_t* const dataEnd   = dataStart + mappings[1].size;
-    unsigned pageCount = (unsigned)(mappings[1].size+pageSize-1)/pageSize;
-    const long bitmapSize = pageCount*(pageSize/4)*sizeof(bool);
-    bool* bitmap = (bool*)calloc(bitmapSize, 1);
-    for (void* p : _pointersForASLR) {
-        if ( (p < dataStart) || ( p > dataEnd) ) {
-            _diagnostics.error("DATA pointer for sliding, out of range\n");
-            free(bitmap);
-            return;
-        }
-        long byteOffset = (long)((uint8_t*)p - dataStart);
-        if ( (byteOffset % 4) != 0 ) {
-            _diagnostics.error("pointer not 4-byte aligned in DATA offset 0x%08lX\n", byteOffset);
-            free(bitmap);
-            return;
-        }
-        long boolIndex = byteOffset / 4;
-        // work around <rdar://24941083> by ignoring pointers to be slid that are NULL on disk
-        if ( *((pint_t*)p) == 0 ) {
-            std::string dylibName;
-            std::string segName;
-            findDylibAndSegment(p, dylibName, segName);
-            _diagnostics.warning("NULL pointer asked to be slid in %s at DATA region offset 0x%04lX of %s", segName.c_str(), byteOffset, dylibName.c_str());
-            continue;
-        }
-        bitmap[boolIndex] = true;
-    }
-
     // fill in fixed info
     assert(_slideInfoFileOffset != 0);
-    dyld_cache_slide_info2* info = (dyld_cache_slide_info2*)((uint8_t*)_buffer + _slideInfoFileOffset);
+    dyld_cache_slide_info2* info = (dyld_cache_slide_info2*)_readOnlyRegion.buffer;
     info->version    = 2;
     info->page_size  = pageSize;
     info->delta_mask = _archLayout->pointerDeltaMask;
@@ -1312,20 +1584,18 @@
     // set page starts and extras for each page
     std::vector<uint16_t> pageStarts;
     std::vector<uint16_t> pageExtras;
-    pageStarts.reserve(pageCount);
-    uint8_t* pageContent = dataStart;;
+    pageStarts.reserve(dataPageCount);
+    uint8_t* pageContent = _readWriteRegion.buffer;
     const bool* bitmapForPage = bitmap;
-    for (unsigned i=0; i < pageCount; ++i) {
+    for (unsigned i=0; i < dataPageCount; ++i) {
         //warning("page[%d]", i);
-        addPageStarts<P>(pageContent, bitmapForPage, info, pageStarts, pageExtras);
+        addPageStartsV2<P>(pageContent, bitmapForPage, info, pageStarts, pageExtras);
         if ( _diagnostics.hasError() ) {
-            free(bitmap);
             return;
         }
         pageContent += pageSize;
         bitmapForPage += (sizeof(bool)*(pageSize/4));
     }
-    free((void*)bitmap);
 
     // fill in computed info
     info->page_starts_offset = sizeof(dyld_cache_slide_info2);
@@ -1339,11 +1609,213 @@
     for (unsigned i=0; i < pageExtras.size(); ++i)
         pageExtrasBuffer[i] = pageExtras[i];
     // update header with final size
-    _buffer->header.slideInfoSize = align(info->page_extras_offset + pageExtras.size()*sizeof(uint16_t), _archLayout->sharedRegionAlignP2);
-    if ( _buffer->header.slideInfoSize > _slideInfoBufferSizeAllocated ) {
+    uint64_t slideInfoSize = align(info->page_extras_offset + pageExtras.size()*sizeof(uint16_t), _archLayout->sharedRegionAlignP2);
+    if ( slideInfoSize > _slideInfoBufferSizeAllocated ) {
         _diagnostics.error("kernel slide info overflow buffer");
     }
-    //warning("pageCount=%u, page_starts_count=%lu, page_extras_count=%lu", pageCount, pageStarts.size(), pageExtras.size());
+    ((dyld_cache_header*)_readExecuteRegion.buffer)->slideInfoSize = slideInfoSize;
+    //fprintf(stderr, "pageCount=%u, page_starts_count=%lu, page_extras_count=%lu\n", dataPageCount, pageStarts.size(), pageExtras.size());
+}
+
+// fits in to int16_t
+static bool smallValue(uint64_t value)
+{
+    uint32_t high = (value & 0xFFFF8000);
+    return (high == 0) || (high == 0xFFFF8000);
+}
+
+template <typename P>
+bool CacheBuilder::makeRebaseChainV4(uint8_t* pageContent, uint16_t lastLocationOffset, uint16_t offset, const dyld_cache_slide_info4* info)
+{
+    typedef typename P::uint_t     pint_t;
+
+    const pint_t   deltaMask    = (pint_t)(info->delta_mask);
+    const pint_t   valueMask    = ~deltaMask;
+    const pint_t   valueAdd     = (pint_t)(info->value_add);
+    const unsigned deltaShift   = __builtin_ctzll(deltaMask) - 2;
+    const uint32_t maxDelta     = (uint32_t)(deltaMask >> deltaShift);
+
+    pint_t* lastLoc = (pint_t*)&pageContent[lastLocationOffset+0];
+    pint_t lastValue = (pint_t)P::getP(*lastLoc);
+    if ( (lastValue - valueAdd) & deltaMask ) {
+        std::string dylibName;
+        std::string segName;
+        findDylibAndSegment((void*)pageContent, dylibName, segName);
+        _diagnostics.error("rebase pointer does not point within cache. lastOffset=0x%04X, seg=%s, dylib=%s\n",
+                            lastLocationOffset, segName.c_str(), dylibName.c_str());
+        return false;
+    }
+    if ( offset <= (lastLocationOffset+maxDelta) ) {
+        // previous location in range, make link from it
+        // encode this location into last value
+        pint_t delta = offset - lastLocationOffset;
+        pint_t newLastValue = ((lastValue - valueAdd) & valueMask) | (delta << deltaShift);
+        //warning("  add chain: delta = %d, lastOffset=0x%03X, offset=0x%03X, org value=0x%08lX, new value=0x%08lX",
+        //                    offset - lastLocationOffset, lastLocationOffset, offset, (long)lastValue, (long)newLastValue);
+        P::setP(*lastLoc, newLastValue);
+        return true;
+    }
+    //fprintf(stderr, "  too big delta = %d, lastOffset=0x%03X, offset=0x%03X\n", offset - lastLocationOffset, lastLocationOffset, offset);
+
+    // distance between rebase locations is too far
+    // see if we can make a chain from non-rebase locations
+    uint16_t nonRebaseLocationOffsets[1024];
+    unsigned nrIndex = 0;
+    for (uint16_t i = lastLocationOffset; i < offset-maxDelta; ) {
+        nonRebaseLocationOffsets[nrIndex] = 0;
+        for (int j=maxDelta; j > 0; j -= 4) {
+            pint_t value = (pint_t)P::getP(*(pint_t*)&pageContent[i+j]);
+            if ( smallValue(value) ) {
+                // Steal values of 0 to be used in the rebase chain
+                nonRebaseLocationOffsets[nrIndex] = i+j;
+                break;
+            }
+        }
+        if ( nonRebaseLocationOffsets[nrIndex] == 0 ) {
+            lastValue = (pint_t)P::getP(*lastLoc);
+            pint_t newValue = ((lastValue - valueAdd) & valueMask);
+            //fprintf(stderr, "   no way to make non-rebase delta chain, terminate off=0x%03X, old value=0x%08lX, new value=0x%08lX\n",
+            //                lastLocationOffset, (long)lastValue, (long)newValue);
+            P::setP(*lastLoc, newValue);
+            return false;
+        }
+        i = nonRebaseLocationOffsets[nrIndex];
+        ++nrIndex;
+    }
+
+    // we can make chain. go back and add each non-rebase location to chain
+    uint16_t prevOffset = lastLocationOffset;
+    pint_t* prevLoc = (pint_t*)&pageContent[prevOffset];
+    for (unsigned n=0; n < nrIndex; ++n) {
+        uint16_t nOffset = nonRebaseLocationOffsets[n];
+        assert(nOffset != 0);
+        pint_t* nLoc = (pint_t*)&pageContent[nOffset];
+        uint32_t delta2 = nOffset - prevOffset;
+        pint_t value = (pint_t)P::getP(*prevLoc);
+        pint_t newValue;
+        if ( smallValue(value) )
+            newValue = (value & valueMask) | (delta2 << deltaShift);
+        else
+            newValue = ((value - valueAdd) & valueMask) | (delta2 << deltaShift);
+        //warning("    non-rebase delta = %d, to off=0x%03X, old value=0x%08lX, new value=0x%08lX", delta2, nOffset, (long)value, (long)newValue);
+        P::setP(*prevLoc, newValue);
+        prevOffset = nOffset;
+        prevLoc = nLoc;
+    }
+    uint32_t delta3 = offset - prevOffset;
+    pint_t value = (pint_t)P::getP(*prevLoc);
+    pint_t newValue;
+    if ( smallValue(value) )
+        newValue = (value & valueMask) | (delta3 << deltaShift);
+    else
+        newValue = ((value - valueAdd) & valueMask) | (delta3 << deltaShift);
+    //warning("    non-rebase delta = %d, to off=0x%03X, old value=0x%08lX, new value=0x%08lX", delta3, offset, (long)value, (long)newValue);
+    P::setP(*prevLoc, newValue);
+
+    return true;
+}
+
+
+template <typename P>
+void CacheBuilder::addPageStartsV4(uint8_t* pageContent, const bool bitmap[], const dyld_cache_slide_info4* info,
+                                std::vector<uint16_t>& pageStarts, std::vector<uint16_t>& pageExtras)
+{
+    typedef typename P::uint_t     pint_t;
+
+    const pint_t   deltaMask    = (pint_t)(info->delta_mask);
+    const pint_t   valueMask    = ~deltaMask;
+    const uint32_t pageSize     = info->page_size;
+    const pint_t   valueAdd     = (pint_t)(info->value_add);
+
+    uint16_t startValue = DYLD_CACHE_SLIDE4_PAGE_NO_REBASE;
+    uint16_t lastLocationOffset = 0xFFFF;
+    for(uint32_t i=0; i < pageSize/4; ++i) {
+        unsigned offset = i*4;
+        if ( bitmap[i] ) {
+            if ( startValue == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE ) {
+                // found first rebase location in page
+                startValue = i;
+            }
+            else if ( !makeRebaseChainV4<P>(pageContent, lastLocationOffset, offset, info) ) {
+                // can't record all rebasings in one chain
+                if ( (startValue & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) == 0 ) {
+                    // switch page_start to "extras" which is a list of chain starts
+                    unsigned indexInExtras = (unsigned)pageExtras.size();
+                    if ( indexInExtras >= DYLD_CACHE_SLIDE4_PAGE_INDEX ) {
+                        _diagnostics.error("rebase overflow in v4 page extras");
+                        return;
+                    }
+                    pageExtras.push_back(startValue);
+                    startValue = indexInExtras | DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA;
+                }
+                pageExtras.push_back(i);
+            }
+            lastLocationOffset = offset;
+        }
+    }
+    if ( lastLocationOffset != 0xFFFF ) {
+        // mark end of chain
+        pint_t* lastLoc = (pint_t*)&pageContent[lastLocationOffset];
+        pint_t lastValue = (pint_t)P::getP(*lastLoc);
+        pint_t newValue = ((lastValue - valueAdd) & valueMask);
+        P::setP(*lastLoc, newValue);
+    }
+    if ( startValue & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA ) {
+        // add end bit to extras
+        pageExtras.back() |= DYLD_CACHE_SLIDE4_PAGE_EXTRA_END;
+    }
+    pageStarts.push_back(startValue);
+}
+
+
+
+template <typename P>
+void CacheBuilder::writeSlideInfoV4(const bool bitmap[], unsigned dataPageCount)
+{
+    typedef typename P::uint_t    pint_t;
+    typedef typename P::E         E;
+    const uint32_t pageSize = 4096;
+
+    // fill in fixed info
+    assert(_slideInfoFileOffset != 0);
+    dyld_cache_slide_info4* info = (dyld_cache_slide_info4*)_readOnlyRegion.buffer;
+    info->version    = 4;
+    info->page_size  = pageSize;
+    info->delta_mask = _archLayout->pointerDeltaMask;
+    info->value_add  = (sizeof(pint_t) == 8) ? 0 : _archLayout->sharedMemoryStart;  // only value_add for 32-bit archs
+
+    // set page starts and extras for each page
+    std::vector<uint16_t> pageStarts;
+    std::vector<uint16_t> pageExtras;
+    pageStarts.reserve(dataPageCount);
+    uint8_t* pageContent = _readWriteRegion.buffer;
+    const bool* bitmapForPage = bitmap;
+    for (unsigned i=0; i < dataPageCount; ++i) {
+        addPageStartsV4<P>(pageContent, bitmapForPage, info, pageStarts, pageExtras);
+        if ( _diagnostics.hasError() ) {
+            return;
+        }
+        pageContent += pageSize;
+        bitmapForPage += (sizeof(bool)*(pageSize/4));
+    }
+    // fill in computed info
+    info->page_starts_offset = sizeof(dyld_cache_slide_info4);
+    info->page_starts_count  = (unsigned)pageStarts.size();
+    info->page_extras_offset = (unsigned)(sizeof(dyld_cache_slide_info4)+pageStarts.size()*sizeof(uint16_t));
+    info->page_extras_count  = (unsigned)pageExtras.size();
+    uint16_t* pageStartsBuffer = (uint16_t*)((char*)info + info->page_starts_offset);
+    uint16_t* pageExtrasBuffer = (uint16_t*)((char*)info + info->page_extras_offset);
+    for (unsigned i=0; i < pageStarts.size(); ++i)
+        pageStartsBuffer[i] = pageStarts[i];
+    for (unsigned i=0; i < pageExtras.size(); ++i)
+        pageExtrasBuffer[i] = pageExtras[i];
+    // update header with final size
+    uint64_t slideInfoSize = align(info->page_extras_offset + pageExtras.size()*sizeof(uint16_t), _archLayout->sharedRegionAlignP2);
+    if ( slideInfoSize > _slideInfoBufferSizeAllocated ) {
+        _diagnostics.error("kernel slide info v4 overflow buffer");
+    }
+    ((dyld_cache_header*)_readExecuteRegion.buffer)->slideInfoSize = slideInfoSize;
+    //fprintf(stderr, "pageCount=%u, page_starts_count=%lu, page_extras_count=%lu\n", dataPageCount, pageStarts.size(), pageExtras.size());
 }
 
 
@@ -1405,50 +1877,100 @@
 
 */
 
-void CacheBuilder::fipsSign() {
-    __block bool found = false;
-    _buffer->forEachImage(^(const mach_header* mh, const char* installName) {
-        __block void *hash_location = nullptr;
-        // Return if this is not corecrypto
-        if (strcmp(installName, "/usr/lib/system/libcorecrypto.dylib") != 0) {
-            return;
-        }
-        found = true;
-        auto parser = dyld3::MachOParser(mh, true);
-        parser.forEachLocalSymbol(_diagnostics, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) {
-            if (strcmp(symbolName, "_fipspost_precalc_hmac") != 0)
-                return;
-            hash_location = (void *)(n_value - _archLayout->sharedMemoryStart + (uintptr_t)_buffer);
-            stop = true;
-        });
-
-        // Bail out if we did not find the symbol
-        if (hash_location == nullptr) {
-            _diagnostics.warning("Could not find _fipspost_precalc_hmac, skipping FIPS sealing");
-            return;
-        }
-
-        parser.forEachSection(^(const char *segName, const char *sectionName, uint32_t flags, const void *content, size_t size, bool illegalSectionSize, bool &stop) {
-            // FIXME: If we ever implement userspace __TEXT_EXEC this will need to be updated
-            if ( (strcmp(segName, "__TEXT" ) != 0) || (strcmp(sectionName, "__text") != 0) )  {
-                return;
+
+
+uint16_t CacheBuilder::pageStartV3(uint8_t* pageContent, uint32_t pageSize, const bool bitmap[])
+{
+    const int maxPerPage = pageSize / 4;
+    uint16_t result = DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE;
+    dyld3::MachOLoaded::ChainedFixupPointerOnDisk* lastLoc = nullptr;
+    for (int i=0; i < maxPerPage; ++i) {
+        if ( bitmap[i] ) {
+            if ( result == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE ) {
+                // found first rebase location in page
+                result = i * 4;
             }
-
-            if (illegalSectionSize) {
-                _diagnostics.error("FIPS section %s/%s extends beyond the end of the segment", segName, sectionName);
-                return;
+            dyld3::MachOLoaded::ChainedFixupPointerOnDisk* loc = (dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)(pageContent + i*4);;
+            if ( lastLoc != nullptr ) {
+                // update chain (original chain may be wrong because of segment packing)
+                lastLoc->plainRebase.next = loc - lastLoc;
             }
-
-            //We have _fipspost_precalc_hmac and __TEXT,__text, seal it
-            unsigned char hmac_key = 0;
-            CCHmac(kCCHmacAlgSHA256, &hmac_key, 1, content, size, hash_location);
-            stop = true;
-        });
+            lastLoc = loc;
+        }
+    }
+    if ( lastLoc != nullptr ) {
+        // mark last one as end of chain
+        lastLoc->plainRebase.next = 0;
+	}
+    return result;
+}
+
+
+void CacheBuilder::writeSlideInfoV3(const bool bitmap[], unsigned dataPageCount)
+{
+	const uint32_t pageSize = 4096;
+
+    // fill in fixed info
+    assert(_slideInfoFileOffset != 0);
+    dyld_cache_slide_info3* info = (dyld_cache_slide_info3*)_readOnlyRegion.buffer;
+    info->version           = 3;
+    info->page_size         = pageSize;
+    info->page_starts_count = dataPageCount;
+    info->auth_value_add    = _archLayout->sharedMemoryStart;
+    
+    // fill in per-page starts
+    uint8_t* pageContent = _readWriteRegion.buffer;
+    const bool* bitmapForPage = bitmap;
+    for (unsigned i=0; i < dataPageCount; ++i) {
+        info->page_starts[i] = pageStartV3(pageContent, pageSize, bitmapForPage);
+        pageContent += pageSize;
+        bitmapForPage += (sizeof(bool)*(pageSize/4));
+    }
+
+    // update header with final size
+    dyld_cache_header* dyldCacheHeader = (dyld_cache_header*)_readExecuteRegion.buffer;
+    dyldCacheHeader->slideInfoSize = align(__offsetof(dyld_cache_slide_info3, page_starts[dataPageCount]), _archLayout->sharedRegionAlignP2);
+    if ( dyldCacheHeader->slideInfoSize > _slideInfoBufferSizeAllocated ) {
+        _diagnostics.error("kernel slide info overflow buffer");
+    }
+}
+
+
+void CacheBuilder::fipsSign()
+{
+    // find libcorecrypto.dylib in cache being built
+    DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    __block const dyld3::MachOLoaded* ml = nullptr;
+    dyldCache->forEachImage(^(const mach_header* mh, const char* installName) {
+        if ( strcmp(installName, "/usr/lib/system/libcorecrypto.dylib") == 0 )
+            ml = (dyld3::MachOLoaded*)mh;
     });
-
-    if (!found) {
-        _diagnostics.warning("Could not find /usr/lib/system/libcorecrypto.dylib, skipping FIPS sealing");
-    }
+    if ( ml == nullptr ) {
+        _diagnostics.warning("Could not find libcorecrypto.dylib, skipping FIPS sealing");
+        return;
+    }
+
+    // find location in libcorecrypto.dylib to store hash of __text section
+    uint64_t hashStoreSize;
+    const void* hashStoreLocation = ml->findSectionContent("__TEXT", "__fips_hmacs", hashStoreSize);
+    if ( hashStoreLocation == nullptr ) {
+        _diagnostics.warning("Could not find __TEXT/__fips_hmacs section in libcorecrypto.dylib, skipping FIPS sealing");
+        return;
+    }
+    if ( hashStoreSize != 32 ) {
+        _diagnostics.warning("__TEXT/__fips_hmacs section in libcorecrypto.dylib is not 32 bytes in size, skipping FIPS sealing");
+        return;
+    }
+
+    // compute hmac hash of __text section
+    uint64_t textSize;
+    const void* textLocation = ml->findSectionContent("__TEXT", "__text", textSize);
+    if ( textLocation == nullptr ) {
+        _diagnostics.warning("Could not find __TEXT/__text section in libcorecrypto.dylib, skipping FIPS sealing");
+        return;
+    }
+    unsigned char hmac_key = 0;
+    CCHmac(kCCHmacAlgSHA256, &hmac_key, 1, textLocation, textSize, (void*)hashStoreLocation); // store hash directly into hashStoreLocation
 }
 
 void CacheBuilder::codeSign()
@@ -1487,9 +2009,7 @@
             cacheIdentifier = "com.apple.dyld.cache." + _options.archName + ".development";
     }
     // get pointers into shared cache buffer
-    size_t          inBbufferSize = _currentFileSize;
-    const uint8_t*  inBuffer = (uint8_t*)_buffer;
-    uint8_t*        csBuffer = (uint8_t*)_buffer+inBbufferSize;
+    size_t          inBbufferSize = _readExecuteRegion.sizeInUse+_readWriteRegion.sizeInUse+_readOnlyRegion.sizeInUse+_localSymbolsRegion.sizeInUse;
 
     // layout code signature contents
     uint32_t blobCount     = agile ? 4 : 3;
@@ -1510,14 +2030,18 @@
     size_t   sbSize        = cmsOffset + cmsSize;
     size_t   sigSize       = align(sbSize, 14);       // keep whole cache 16KB aligned
 
-    if ( _currentFileSize+sigSize > _allocatedBufferSize ) {
-        _diagnostics.error("cache buffer too small to hold code signature (buffer size=%lldMB, signature size=%ldMB, free space=%lldMB)",
-                            _allocatedBufferSize/1024/1024, sigSize/1024/1024, (_allocatedBufferSize-_currentFileSize)/1024/1024);
+    // allocate space for blob
+    vm_address_t codeSigAlloc;
+    if ( vm_allocate(mach_task_self(), &codeSigAlloc, sigSize, VM_FLAGS_ANYWHERE) != 0 ) {
+        _diagnostics.error("could not allocate code signature buffer");
         return;
     }
+    _codeSignatureRegion.buffer     = (uint8_t*)codeSigAlloc;
+    _codeSignatureRegion.bufferSize = sigSize;
+    _codeSignatureRegion.sizeInUse  = sigSize;
 
     // create overall code signature which is a superblob
-    CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(csBuffer);
+    CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(_codeSignatureRegion.buffer);
     sb->magic           = htonl(CSMAGIC_EMBEDDED_SIGNATURE);
     sb->length          = htonl(sbSize);
     sb->count           = htonl(blobCount);
@@ -1560,10 +2084,9 @@
     cd->codeLimit64     = 0;                            // falls back to codeLimit
 
     // executable segment info
-    const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    cd->execSegBase     = htonll(mappings[0].fileOffset); // base of TEXT segment
-    cd->execSegLimit    = htonll(mappings[0].size);     // size of TEXT segment
-    cd->execSegFlags    = 0;                            // not a main binary
+    cd->execSegBase     = htonll(_readExecuteRegion.cacheFileOffset); // base of TEXT segment
+    cd->execSegLimit    = htonll(_readExecuteRegion.sizeInUse);       // size of TEXT segment
+    cd->execSegFlags    = 0;                                          // not a main binary
 
     // initialize dynamic fields of Code Directory
     strcpy((char*)cd + idOffset, cacheIdentifier.c_str());
@@ -1624,22 +2147,54 @@
     cms->magic  = htonl(CSMAGIC_BLOBWRAPPER);
     cms->length = htonl(sizeof(CS_Blob));
 
+
     // alter header of cache to record size and location of code signature
     // do this *before* hashing each page
-    _buffer->header.codeSignatureOffset = inBbufferSize;
-    _buffer->header.codeSignatureSize   = sigSize;
+    dyld_cache_header* cache = (dyld_cache_header*)_readExecuteRegion.buffer;
+    cache->codeSignatureOffset  = inBbufferSize;
+    cache->codeSignatureSize    = sigSize;
+
+    const uint32_t rwSlotStart = (uint32_t)(_readExecuteRegion.sizeInUse / CS_PAGE_SIZE);
+    const uint32_t roSlotStart = (uint32_t)(rwSlotStart + _readWriteRegion.sizeInUse / CS_PAGE_SIZE);
+    const uint32_t localsSlotStart = (uint32_t)(roSlotStart + _readOnlyRegion.sizeInUse / CS_PAGE_SIZE);
+    auto codeSignPage = ^(size_t i) {
+        const uint8_t* code = nullptr;
+        // move to correct region
+        if ( i < rwSlotStart )
+            code = _readExecuteRegion.buffer + (i * CS_PAGE_SIZE);
+        else if ( i >= rwSlotStart && i < roSlotStart )
+            code = _readWriteRegion.buffer + ((i - rwSlotStart) * CS_PAGE_SIZE);
+        else if ( i >= roSlotStart && i < localsSlotStart )
+            code = _readOnlyRegion.buffer + ((i - roSlotStart) * CS_PAGE_SIZE);
+        else
+            code = _localSymbolsRegion.buffer + ((i - localsSlotStart) * CS_PAGE_SIZE);
+
+        CCDigest(dscDigestFormat, code, CS_PAGE_SIZE, hashSlot + (i * dscHashSize));
+
+        if ( agile ) {
+            CCDigest(kCCDigestSHA256, code, CS_PAGE_SIZE, hash256Slot + (i * CS_HASH_SIZE_SHA256));
+        }
+    };
 
     // compute hashes
-    const uint8_t* code = inBuffer;
-    for (uint32_t i=0; i < slotCount; ++i) {
-        CCDigest(dscDigestFormat, code, CS_PAGE_SIZE, hashSlot);
-        hashSlot += dscHashSize;
-
-        if ( agile ) {
-            CCDigest(kCCDigestSHA256, code, CS_PAGE_SIZE, hash256Slot);
-            hash256Slot += CS_HASH_SIZE_SHA256;
-        }
-        code += CS_PAGE_SIZE;
+    dispatch_apply(slotCount, DISPATCH_APPLY_AUTO, ^(size_t i) {
+        codeSignPage(i);
+    });
+
+    // Now that we have a code signature, compute a UUID from it.
+
+    // Clear existing UUID, then MD5 whole cache buffer.
+    {
+        uint8_t* uuidLoc = cache->uuid;
+        assert(uuid_is_null(uuidLoc));
+        static_assert(offsetof(dyld_cache_header, uuid) / CS_PAGE_SIZE == 0, "uuid is expected in the first page of the cache");
+        CC_MD5((const void*)cd, (unsigned)cdSize, uuidLoc);
+        // <rdar://problem/6723729> uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+        uuidLoc[6] = ( uuidLoc[6] & 0x0F ) | ( 3 << 4 );
+        uuidLoc[8] = ( uuidLoc[8] & 0x3F ) | 0x80;
+
+        // Now codesign page 0 again
+        codeSignPage(0);
     }
 
     // hash of entire code directory (cdHash) uses same hash as each page
@@ -1656,9 +2211,6 @@
     else {
         memset(_cdHashSecond, 0, 20);
     }
-
-    // increase file size to include newly append code signature
-    _currentFileSize += sigSize;
 }
 
 const bool CacheBuilder::agileSignature()
@@ -1684,92 +2236,345 @@
     return cdHash(_cdHashSecond);
 }
 
-void CacheBuilder::addCachedDylibsImageGroup(dyld3::ImageProxyGroup* dylibGroup)
-{
-    const dyld3::launch_cache::binary_format::ImageGroup* groupBinary = dylibGroup->makeImageGroupBinary(_diagnostics, _s_neverStubEliminate);
-    if (!groupBinary)
+
+void CacheBuilder::buildImageArray(std::vector<DyldSharedCache::FileAlias>& aliases)
+{
+    typedef dyld3::closure::ClosureBuilder::CachedDylibInfo         CachedDylibInfo;
+    typedef dyld3::closure::Image::PatchableExport::PatchLocation   PatchLocation;
+    typedef uint64_t                                                CacheOffset;
+
+    // convert STL data structures to simple arrays to passe to makeDyldCacheImageArray()
+    __block std::vector<CachedDylibInfo> dylibInfos;
+    __block std::unordered_map<dyld3::closure::ImageNum, const dyld3::MachOLoaded*> imageNumToML;
+    DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    cache->forEachImage(^(const mach_header* mh, const char* installName) {
+        uint64_t mtime;
+        uint64_t inode;
+        cache->getIndexedImageEntry((uint32_t)dylibInfos.size(), mtime, inode);
+        CachedDylibInfo entry;
+        entry.fileInfo.fileContent  = mh;
+        entry.fileInfo.path         = installName;
+        entry.fileInfo.sliceOffset  = 0;
+        entry.fileInfo.inode        = inode;
+        entry.fileInfo.mtime        = mtime;
+        dylibInfos.push_back(entry);
+        imageNumToML[(dyld3::closure::ImageNum)(dylibInfos.size())] = (dyld3::MachOLoaded*)mh;
+    });
+
+    // Convert symlinks from STL to simple char pointers.
+    std::vector<dyld3::closure::ClosureBuilder::CachedDylibAlias> dylibAliases;
+    dylibAliases.reserve(aliases.size());
+    for (const auto& alias : aliases)
+        dylibAliases.push_back({ alias.realPath.c_str(), alias.aliasPath.c_str() });
+
+
+    __block std::unordered_map<const dyld3::MachOLoaded*, std::set<CacheOffset>>    dylibToItsExports;
+    __block std::unordered_map<CacheOffset, std::vector<PatchLocation>>             exportsToUses;
+    __block std::unordered_map<CacheOffset, const char*>                            exportsToName;
+
+    dyld3::closure::ClosureBuilder::CacheDylibsBindingHandlers handlers;
+
+    handlers.chainedBind = ^(dyld3::closure::ImageNum, const dyld3::MachOLoaded* imageLoadAddress,
+                             const dyld3::Array<uint64_t>& starts,
+                             const dyld3::Array<dyld3::closure::Image::ResolvedSymbolTarget>& targets,
+                             const dyld3::Array<dyld3::closure::ClosureBuilder::ResolvedTargetInfo>& targetInfos) {
+        for (uint64_t start : starts) {
+            dyld3::closure::Image::forEachChainedFixup((void*)imageLoadAddress, start, ^(uint64_t* fixupLoc, dyld3::MachOLoaded::ChainedFixupPointerOnDisk fixupInfo, bool& stop) {
+                 // record location in aslr tracker so kernel can slide this on page-in
+                _aslrTracker.add(fixupLoc);
+
+                // if bind, record info for patch table and convert to rebase
+                if ( fixupInfo.plainBind.bind ) {
+                    dyld3::closure::Image::ResolvedSymbolTarget               target     = targets[fixupInfo.plainBind.ordinal];
+                    const dyld3::closure::ClosureBuilder::ResolvedTargetInfo& targetInfo = targetInfos[fixupInfo.plainBind.ordinal];
+                    dyld3::MachOLoaded::ChainedFixupPointerOnDisk*            loc;
+                    uint64_t                                                  offsetInCache;
+                    switch ( target.sharedCache.kind ) {
+                        case dyld3::closure::Image::ResolvedSymbolTarget::kindSharedCache:
+                            loc = (dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)fixupLoc;
+                            offsetInCache = target.sharedCache.offset - targetInfo.addend;
+                            dylibToItsExports[targetInfo.foundInDylib].insert(offsetInCache);
+                            exportsToName[offsetInCache] = targetInfo.foundSymbolName;
+                            if ( fixupInfo.authBind.auth ) {
+                                // turn this auth bind into an auth rebase into the cache
+                                loc->authRebase.bind = 0;
+                                loc->authRebase.target = target.sharedCache.offset;
+                                exportsToUses[offsetInCache].push_back(PatchLocation((uint8_t*)fixupLoc - _readExecuteRegion.buffer, targetInfo.addend, *loc));
+                            }
+                            else {
+                                // turn this plain bind into an plain rebase into the cache
+                                loc->plainRebase.bind = 0;
+                                loc->plainRebase.target = _archLayout->sharedMemoryStart + target.sharedCache.offset;
+                                exportsToUses[offsetInCache].push_back(PatchLocation((uint8_t*)fixupLoc - _readExecuteRegion.buffer, targetInfo.addend));
+                            }
+                            break;
+                        case dyld3::closure::Image::ResolvedSymbolTarget::kindAbsolute:
+                             if ( _archLayout->is64 )
+                                *((uint64_t*)fixupLoc) = target.absolute.value;
+                            else
+                                *((uint32_t*)fixupLoc) = (uint32_t)(target.absolute.value);
+                            // don't record absolute targets for ASLR
+                            break;
+                        default:
+                            assert(0 && "unsupported ResolvedSymbolTarget kind in dyld cache");
+                    }
+                }
+            });
+        }
+    };
+
+    handlers.rebase = ^(dyld3::closure::ImageNum imageNum, const dyld3::MachOLoaded* imageToFix, uint32_t runtimeOffset) {
+        // record location in aslr tracker so kernel can slide this on page-in
+        uint8_t* fixupLoc = (uint8_t*)imageToFix+runtimeOffset;
+        _aslrTracker.add(fixupLoc);
+    };
+
+    handlers.bind = ^(dyld3::closure::ImageNum imageNum, const dyld3::MachOLoaded* mh,
+                      uint32_t runtimeOffset, dyld3::closure::Image::ResolvedSymbolTarget target,
+                      const dyld3::closure::ClosureBuilder::ResolvedTargetInfo& targetInfo) {
+        uint8_t* fixupLoc = (uint8_t*)mh+runtimeOffset;
+
+        // binder is called a second time for weak_bind info, which we ignore when building cache
+        const bool weakDefUseAlreadySet = targetInfo.weakBindCoalese && _aslrTracker.has(fixupLoc);
+
+        // do actual bind that sets pointer in image to unslid target address
+        uint64_t offsetInCache;
+        switch ( target.sharedCache.kind ) {
+            case dyld3::closure::Image::ResolvedSymbolTarget::kindSharedCache:
+                offsetInCache = target.sharedCache.offset - targetInfo.addend;
+                dylibToItsExports[targetInfo.foundInDylib].insert(offsetInCache);
+                exportsToUses[offsetInCache].push_back(PatchLocation(fixupLoc - _readExecuteRegion.buffer, targetInfo.addend));
+                exportsToName[offsetInCache] = targetInfo.foundSymbolName;
+                if ( !weakDefUseAlreadySet ) {
+                    if ( _archLayout->is64 )
+                        *((uint64_t*)fixupLoc) = _archLayout->sharedMemoryStart + target.sharedCache.offset;
+                    else
+                        *((uint32_t*)fixupLoc) = (uint32_t)(_archLayout->sharedMemoryStart + target.sharedCache.offset);
+                    // record location in aslr tracker so kernel can slide this on page-in
+                    _aslrTracker.add(fixupLoc);
+                }
+                break;
+            case dyld3::closure::Image::ResolvedSymbolTarget::kindAbsolute:
+                 if ( _archLayout->is64 )
+                    *((uint64_t*)fixupLoc) = target.absolute.value;
+                else
+                    *((uint32_t*)fixupLoc) = (uint32_t)(target.absolute.value);
+                // don't record absolute targets for ASLR
+                // HACK: Split seg may have added a target.  Remove it
+                _aslrTracker.remove(fixupLoc);
+                if ( (targetInfo.libOrdinal > 0) && (targetInfo.libOrdinal <= mh->dependentDylibCount()) ) {
+                    _missingWeakImports[fixupLoc] = mh->dependentDylibLoadPath(targetInfo.libOrdinal - 1);
+                }
+                break;
+            default:
+                assert(0 && "unsupported ResolvedSymbolTarget kind in dyld cache");
+        }
+    };
+
+    handlers.forEachExportsPatch = ^(dyld3::closure::ImageNum imageNum, void (^handler)(const dyld3::closure::ClosureBuilder::CacheDylibsBindingHandlers::PatchInfo&)) {
+        const dyld3::MachOLoaded* ml = imageNumToML[imageNum];
+        for (CacheOffset exportCacheOffset : dylibToItsExports[ml]) {
+            dyld3::closure::ClosureBuilder::CacheDylibsBindingHandlers::PatchInfo info;
+            std::vector<PatchLocation>& uses = exportsToUses[exportCacheOffset];
+            uses.erase(std::unique(uses.begin(), uses.end()), uses.end());
+            info.exportCacheOffset = (uint32_t)exportCacheOffset;
+            info.exportSymbolName  = exportsToName[exportCacheOffset];
+            info.usesCount         = (uint32_t)uses.size();
+            info.usesArray         = &uses.front();
+            handler(info);
+        }
+    };
+
+
+    // build ImageArray for all dylibs in dyld cache
+    dyld3::closure::PathOverrides pathOverrides;
+    dyld3::closure::ClosureBuilder cb(dyld3::closure::kFirstDyldCacheImageNum, _fileSystem, cache, false, pathOverrides, dyld3::closure::ClosureBuilder::AtPath::none, nullptr, _archLayout->archName, _options.platform, &handlers);
+    dyld3::Array<CachedDylibInfo> dylibs(&dylibInfos[0], dylibInfos.size(), dylibInfos.size());
+    const dyld3::Array<dyld3::closure::ClosureBuilder::CachedDylibAlias> aliasesArray(dylibAliases.data(), dylibAliases.size(), dylibAliases.size());
+    _imageArray = cb.makeDyldCacheImageArray(_options.optimizeStubs, dylibs, aliasesArray);
+    if ( cb.diagnostics().hasError() ) {
+        _diagnostics.error("%s", cb.diagnostics().errorMessage().c_str());
         return;
-
-    dyld3::launch_cache::ImageGroup group(groupBinary);
-    size_t groupSize = group.size();
-
-    if ( _currentFileSize+groupSize > _allocatedBufferSize ) {
-        _diagnostics.error("cache buffer too small to hold group[0] info (buffer size=%lldMB, group size=%ldMB, free space=%lldMB)",
-                            _allocatedBufferSize/1024/1024, groupSize/1024/1024, (_allocatedBufferSize-_currentFileSize)/1024/1024);
+    }
+}
+
+void CacheBuilder::addImageArray()
+{
+    // build trie of dylib paths
+    __block std::vector<DylibIndexTrie::Entry> dylibEntrys;
+    _imageArray->forEachImage(^(const dyld3::closure::Image* image, bool& stop) {
+        dylibEntrys.push_back(DylibIndexTrie::Entry(image->path(), DylibIndex(image->imageNum()-1)));
+        image->forEachAlias(^(const char *aliasPath, bool &innerStop) {
+            dylibEntrys.push_back(DylibIndexTrie::Entry(aliasPath, DylibIndex(image->imageNum()-1)));
+        });
+    });
+    DylibIndexTrie dylibsTrie(dylibEntrys);
+    std::vector<uint8_t> trieBytes;
+    dylibsTrie.emit(trieBytes);
+    while ( (trieBytes.size() % 4) != 0 )
+        trieBytes.push_back(0);
+
+    // check for fit
+    uint64_t imageArraySize = _imageArray->size();
+    size_t freeSpace = _readOnlyRegion.bufferSize - _readOnlyRegion.sizeInUse;
+    if ( imageArraySize+trieBytes.size() > freeSpace ) {
+        _diagnostics.error("cache buffer too small to hold ImageArray and Trie (buffer size=%lldMB, imageArray size=%lldMB, trie size=%luKB, free space=%ldMB)",
+                            _allocatedBufferSize/1024/1024, imageArraySize/1024/1024, trieBytes.size()/1024, freeSpace/1024/1024);
         return;
     }
 
-    // append ImageGroup data to read-only region of cache
-    uint8_t* loc = (uint8_t*)_buffer + _currentFileSize;
-    memcpy(loc, groupBinary, groupSize);
-    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    _buffer->header.dylibsImageGroupAddr = mappings[2].address + (_currentFileSize - mappings[2].fileOffset);
-    _buffer->header.dylibsImageGroupSize = (uint32_t)groupSize;
-    _currentFileSize += groupSize;
-    free((void*)groupBinary);
-}
-
-
-void CacheBuilder::addCachedOtherDylibsImageGroup(dyld3::ImageProxyGroup* otherGroup)
-{
-    const dyld3::launch_cache::binary_format::ImageGroup* groupBinary = otherGroup->makeImageGroupBinary(_diagnostics);
-    if (!groupBinary)
+    // copy into cache and update header
+    DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    dyldCache->header.dylibsImageArrayAddr = _readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse;
+    dyldCache->header.dylibsImageArraySize = imageArraySize;
+    dyldCache->header.dylibsTrieAddr       = dyldCache->header.dylibsImageArrayAddr + imageArraySize;
+    dyldCache->header.dylibsTrieSize       = trieBytes.size();
+    ::memcpy(_readOnlyRegion.buffer + _readOnlyRegion.sizeInUse, _imageArray, imageArraySize);
+    ::memcpy(_readOnlyRegion.buffer + _readOnlyRegion.sizeInUse + imageArraySize, &trieBytes[0], trieBytes.size());
+    _readOnlyRegion.sizeInUse += align(imageArraySize+trieBytes.size(),14);
+}
+
+void CacheBuilder::addOtherImageArray(const std::vector<LoadedMachO>& otherDylibsAndBundles, std::vector<const LoadedMachO*>& overflowDylibs)
+{
+    DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    dyld3::closure::PathOverrides pathOverrides;
+    dyld3::closure::ClosureBuilder cb(dyld3::closure::kFirstOtherOSImageNum, _fileSystem, cache, false, pathOverrides, dyld3::closure::ClosureBuilder::AtPath::none, nullptr, _archLayout->archName, _options.platform);
+
+    // make ImageArray for other dylibs and bundles
+    STACK_ALLOC_ARRAY(dyld3::closure::LoadedFileInfo, others, otherDylibsAndBundles.size() + overflowDylibs.size());
+    for (const LoadedMachO& other : otherDylibsAndBundles) {
+        if ( !contains(other.loadedFileInfo.path, ".app/") )
+            others.push_back(other.loadedFileInfo);
+    }
+
+    for (const LoadedMachO* dylib : overflowDylibs) {
+        if (dylib->mappedFile.mh->canHavePrecomputedDlopenClosure(dylib->mappedFile.runtimePath.c_str(), ^(const char*) {}) )
+            others.push_back(dylib->loadedFileInfo);
+    }
+
+    // Sort the others array by name so that it is deterministic
+    std::sort(others.begin(), others.end(),
+              [](const dyld3::closure::LoadedFileInfo& a, const dyld3::closure::LoadedFileInfo& b) {
+                  return strcmp(a.path, b.path) < 0;
+              });
+
+    const dyld3::closure::ImageArray* otherImageArray = cb.makeOtherDylibsImageArray(others, (uint32_t)_sortedDylibs.size());
+
+    // build trie of paths
+    __block std::vector<DylibIndexTrie::Entry> otherEntrys;
+    otherImageArray->forEachImage(^(const dyld3::closure::Image* image, bool& stop) {
+        if ( !image->isInvalid() )
+            otherEntrys.push_back(DylibIndexTrie::Entry(image->path(), DylibIndex(image->imageNum())));
+    });
+    DylibIndexTrie dylibsTrie(otherEntrys);
+    std::vector<uint8_t> trieBytes;
+    dylibsTrie.emit(trieBytes);
+    while ( (trieBytes.size() % 4) != 0 )
+        trieBytes.push_back(0);
+
+    // check for fit
+    uint64_t imageArraySize = otherImageArray->size();
+    size_t freeSpace = _readOnlyRegion.bufferSize - _readOnlyRegion.sizeInUse;
+    if ( imageArraySize+trieBytes.size() > freeSpace ) {
+        _diagnostics.error("cache buffer too small to hold ImageArray and Trie (buffer size=%lldMB, imageArray size=%lldMB, trie size=%luKB, free space=%ldMB)",
+                           _allocatedBufferSize/1024/1024, imageArraySize/1024/1024, trieBytes.size()/1024, freeSpace/1024/1024);
         return;
-
-    dyld3::launch_cache::ImageGroup group(groupBinary);
-    size_t groupSize = group.size();
-
-    if ( _currentFileSize+groupSize > _allocatedBufferSize ) {
-        _diagnostics.error("cache buffer too small to hold group[1] info (buffer size=%lldMB, group size=%ldMB, free space=%lldMB)",
-                            _allocatedBufferSize/1024/1024, groupSize/1024/1024, (_allocatedBufferSize-_currentFileSize)/1024/1024);
-        return;
-    }
-
-    // append ImageGroup data to read-only region of cache
-    uint8_t* loc = (uint8_t*)_buffer + _currentFileSize;
-    memcpy(loc, groupBinary, groupSize);
-    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    _buffer->header.otherImageGroupAddr = mappings[2].address + (_currentFileSize - mappings[2].fileOffset);
-    _buffer->header.otherImageGroupSize = (uint32_t)groupSize;
-    _currentFileSize += groupSize;
-    free((void*)groupBinary);
-}
-
-void CacheBuilder::addClosures(const std::map<std::string, const dyld3::launch_cache::binary_format::Closure*>& closures)
-{
+    }
+
+    // copy into cache and update header
+    DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    dyldCache->header.otherImageArrayAddr = _readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse;
+    dyldCache->header.otherImageArraySize = imageArraySize;
+    dyldCache->header.otherTrieAddr       = dyldCache->header.otherImageArrayAddr + imageArraySize;
+    dyldCache->header.otherTrieSize       = trieBytes.size();
+    ::memcpy(_readOnlyRegion.buffer + _readOnlyRegion.sizeInUse, otherImageArray, imageArraySize);
+    ::memcpy(_readOnlyRegion.buffer + _readOnlyRegion.sizeInUse + imageArraySize, &trieBytes[0], trieBytes.size());
+    _readOnlyRegion.sizeInUse += align(imageArraySize+trieBytes.size(),14);
+}
+
+
+void CacheBuilder::addClosures(const std::vector<LoadedMachO>& osExecutables)
+{
+    const DyldSharedCache* dyldCache = (DyldSharedCache*)_readExecuteRegion.buffer;
+
+    __block std::vector<Diagnostics> osExecutablesDiags;
+    __block std::vector<const dyld3::closure::LaunchClosure*> osExecutablesClosures;
+    osExecutablesDiags.resize(osExecutables.size());
+    osExecutablesClosures.resize(osExecutables.size());
+
+    dispatch_apply(osExecutables.size(), DISPATCH_APPLY_AUTO, ^(size_t index) {
+        const LoadedMachO& loadedMachO = osExecutables[index];
+        // don't pre-build closures for staged apps into dyld cache, since they won't run from that location
+        if ( startsWith(loadedMachO.mappedFile.runtimePath, "/private/var/staged_system_apps/") ) {
+            return;
+        }
+        dyld3::closure::PathOverrides pathOverrides;
+        dyld3::closure::ClosureBuilder builder(dyld3::closure::kFirstLaunchClosureImageNum, _fileSystem, dyldCache, false, pathOverrides, dyld3::closure::ClosureBuilder::AtPath::all, nullptr, _archLayout->archName, _options.platform, nullptr);
+        bool issetuid = false;
+        if ( this->_options.platform == dyld3::Platform::macOS )
+            _fileSystem.fileExists(loadedMachO.loadedFileInfo.path, nullptr, nullptr, &issetuid);
+        const dyld3::closure::LaunchClosure* mainClosure = builder.makeLaunchClosure(loadedMachO.loadedFileInfo, issetuid);
+        if ( builder.diagnostics().hasError() ) {
+           osExecutablesDiags[index].error("%s", builder.diagnostics().errorMessage().c_str());
+        }
+        else {
+            assert(mainClosure != nullptr);
+            osExecutablesClosures[index] = mainClosure;
+        }
+    });
+
+    std::map<std::string, const dyld3::closure::LaunchClosure*> closures;
+    for (uint64_t i = 0, e = osExecutables.size(); i != e; ++i) {
+        const LoadedMachO& loadedMachO = osExecutables[i];
+        const Diagnostics& diag = osExecutablesDiags[i];
+        if (diag.hasError()) {
+            if ( _options.verbose ) {
+                _diagnostics.warning("building closure for '%s': %s", loadedMachO.mappedFile.runtimePath.c_str(), diag.errorMessage().c_str());
+                for (const std::string& warn : diag.warnings() )
+                    _diagnostics.warning("%s", warn.c_str());
+            }
+            if ( loadedMachO.inputFile && (loadedMachO.inputFile->mustBeIncluded()) ) {
+                loadedMachO.inputFile->diag.error("%s", diag.errorMessage().c_str());
+            }
+        } else {
+            // Note, a closure could be null here if it has a path we skip.
+            if (osExecutablesClosures[i] != nullptr)
+                closures[loadedMachO.mappedFile.runtimePath] = osExecutablesClosures[i];
+        }
+    }
+
+    osExecutablesDiags.clear();
+    osExecutablesClosures.clear();
+
     // preflight space needed
     size_t closuresSpace = 0;
     for (const auto& entry : closures) {
-        dyld3::launch_cache::Closure closure(entry.second);
-        closuresSpace += closure.size();
-    }
-    size_t freeSpace = _allocatedBufferSize - _currentFileSize;
+        closuresSpace += entry.second->size();
+    }
+    size_t freeSpace = _readOnlyRegion.bufferSize - _readOnlyRegion.sizeInUse;
     if ( closuresSpace > freeSpace ) {
         _diagnostics.error("cache buffer too small to hold all closures (buffer size=%lldMB, closures size=%ldMB, free space=%ldMB)",
                             _allocatedBufferSize/1024/1024, closuresSpace/1024/1024, freeSpace/1024/1024);
         return;
     }
-
-    dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((char*)_buffer + _buffer->header.mappingOffset);
-    _buffer->header.progClosuresAddr = mappings[2].address + (_currentFileSize - mappings[2].fileOffset);
-    uint8_t* closuresBase = (uint8_t*)_buffer + _currentFileSize;
+    DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    cache->header.progClosuresAddr = _readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse;
+    uint8_t* closuresBase = _readOnlyRegion.buffer + _readOnlyRegion.sizeInUse;
     std::vector<DylibIndexTrie::Entry> closureEntrys;
     uint32_t currentClosureOffset = 0;
     for (const auto& entry : closures) {
-        const dyld3::launch_cache::binary_format::Closure* closBuf = entry.second;
+        const dyld3::closure::LaunchClosure* closure = entry.second;
         closureEntrys.push_back(DylibIndexTrie::Entry(entry.first, DylibIndex(currentClosureOffset)));
-        dyld3::launch_cache::Closure closure(closBuf);
-        size_t size = closure.size();
+        size_t size = closure->size();
         assert((size % 4) == 0);
-        memcpy(closuresBase+currentClosureOffset, closBuf, size);
+        memcpy(closuresBase+currentClosureOffset, closure, size);
         currentClosureOffset += size;
         freeSpace -= size;
-        free((void*)closBuf);
-    }
-    _buffer->header.progClosuresSize = currentClosureOffset;
-    _currentFileSize += currentClosureOffset;
-    freeSpace = _allocatedBufferSize - _currentFileSize;
-
+        closure->deallocate();
+    }
+    cache->header.progClosuresSize = currentClosureOffset;
+    _readOnlyRegion.sizeInUse += currentClosureOffset;
+    freeSpace = _readOnlyRegion.bufferSize - _readOnlyRegion.sizeInUse;
     // build trie of indexes into closures list
     DylibIndexTrie closureTrie(closureEntrys);
     std::vector<uint8_t> trieBytes;
@@ -1781,10 +2586,147 @@
                             _allocatedBufferSize/1024/1024, trieBytes.size()/1024/1024, freeSpace/1024/1024);
         return;
     }
-    memcpy((uint8_t*)_buffer + _currentFileSize, &trieBytes[0], trieBytes.size());
-    _buffer->header.progClosuresTrieAddr = mappings[2].address + (_currentFileSize - mappings[2].fileOffset);
-    _buffer->header.progClosuresTrieSize = trieBytes.size();
-    _currentFileSize += trieBytes.size();
-}
-
-
+    memcpy(_readOnlyRegion.buffer + _readOnlyRegion.sizeInUse, &trieBytes[0], trieBytes.size());
+    cache->header.progClosuresTrieAddr = _readOnlyRegion.unslidLoadAddress + _readOnlyRegion.sizeInUse;
+    cache->header.progClosuresTrieSize = trieBytes.size();
+    _readOnlyRegion.sizeInUse += trieBytes.size();
+    _readOnlyRegion.sizeInUse = align(_readOnlyRegion.sizeInUse, 14);
+}
+
+
+bool CacheBuilder::writeCache(void (^cacheSizeCallback)(uint64_t size), bool (^copyCallback)(const uint8_t* src, uint64_t size, uint64_t dstOffset))
+{
+    const dyld_cache_header*       cacheHeader = (dyld_cache_header*)_readExecuteRegion.buffer;
+    const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)(_readExecuteRegion.buffer + cacheHeader->mappingOffset);
+    assert(_readExecuteRegion.sizeInUse       == mappings[0].size);
+    assert(_readWriteRegion.sizeInUse         == mappings[1].size);
+    assert(_readOnlyRegion.sizeInUse          == mappings[2].size);
+    assert(_readExecuteRegion.cacheFileOffset == mappings[0].fileOffset);
+    assert(_readWriteRegion.cacheFileOffset   == mappings[1].fileOffset);
+    assert(_readOnlyRegion.cacheFileOffset    == mappings[2].fileOffset);
+    assert(_codeSignatureRegion.sizeInUse     == cacheHeader->codeSignatureSize);
+    assert(cacheHeader->codeSignatureOffset   == mappings[2].fileOffset+_readOnlyRegion.sizeInUse+_localSymbolsRegion.sizeInUse);
+    cacheSizeCallback(_readExecuteRegion.sizeInUse+_readWriteRegion.sizeInUse+_readOnlyRegion.sizeInUse+_localSymbolsRegion.sizeInUse+_codeSignatureRegion.sizeInUse);
+    bool fullyWritten = copyCallback(_readExecuteRegion.buffer, _readExecuteRegion.sizeInUse, mappings[0].fileOffset);
+    fullyWritten &= copyCallback(_readWriteRegion.buffer, _readWriteRegion.sizeInUse, mappings[1].fileOffset);
+    fullyWritten &= copyCallback(_readOnlyRegion.buffer, _readOnlyRegion.sizeInUse, mappings[2].fileOffset);
+    if ( _localSymbolsRegion.sizeInUse != 0 ) {
+        assert(cacheHeader->localSymbolsOffset == mappings[2].fileOffset+_readOnlyRegion.sizeInUse);
+        fullyWritten &= copyCallback(_localSymbolsRegion.buffer, _localSymbolsRegion.sizeInUse, cacheHeader->localSymbolsOffset);
+    }
+    fullyWritten &= copyCallback(_codeSignatureRegion.buffer, _codeSignatureRegion.sizeInUse, cacheHeader->codeSignatureOffset);
+    return fullyWritten;
+}
+
+
+void CacheBuilder::writeFile(const std::string& path)
+{
+    std::string pathTemplate = path + "-XXXXXX";
+    size_t templateLen = strlen(pathTemplate.c_str())+2;
+    char pathTemplateSpace[templateLen];
+    strlcpy(pathTemplateSpace, pathTemplate.c_str(), templateLen);
+    int fd = mkstemp(pathTemplateSpace);
+    if ( fd != -1 ) {
+        auto cacheSizeCallback = ^(uint64_t size) {
+            ::ftruncate(fd, size);
+        };
+        auto copyCallback = ^(const uint8_t* src, uint64_t size, uint64_t dstOffset) {
+            uint64_t writtenSize = pwrite(fd, src, size, dstOffset);
+            return writtenSize == size;
+        };
+        bool fullyWritten = writeCache(cacheSizeCallback, copyCallback);
+        if ( fullyWritten ) {
+            ::fchmod(fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); // mkstemp() makes file "rw-------", switch it to "rw-r--r--"
+            if ( ::rename(pathTemplateSpace, path.c_str()) == 0) {
+                ::close(fd);
+                return; // success
+            }
+        }
+        else {
+            _diagnostics.error("could not write file %s", pathTemplateSpace);
+        }
+        ::close(fd);
+        ::unlink(pathTemplateSpace);
+    }
+    else {
+        _diagnostics.error("could not open file %s", pathTemplateSpace);
+    }
+}
+
+void CacheBuilder::writeBuffer(uint8_t*& buffer, uint64_t& bufferSize) {
+    auto cacheSizeCallback = ^(uint64_t size) {
+        buffer = (uint8_t*)malloc(size);
+        bufferSize = size;
+    };
+    auto copyCallback = ^(const uint8_t* src, uint64_t size, uint64_t dstOffset) {
+        memcpy(buffer + dstOffset, src, size);
+        return true;
+    };
+    bool fullyWritten = writeCache(cacheSizeCallback, copyCallback);
+    assert(fullyWritten);
+}
+
+void CacheBuilder::writeMapFile(const std::string& path)
+{
+    const DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    std::string mapContent = cache->mapFile();
+    safeSave(mapContent.c_str(), mapContent.size(), path);
+}
+
+void CacheBuilder::writeMapFileBuffer(uint8_t*& buffer, uint64_t& bufferSize)
+{
+    const DyldSharedCache* cache = (DyldSharedCache*)_readExecuteRegion.buffer;
+    std::string mapContent = cache->mapFile();
+    buffer = (uint8_t*)malloc(mapContent.size() + 1);
+    bufferSize = mapContent.size() + 1;
+    memcpy(buffer, mapContent.data(), bufferSize);
+}
+
+
+void CacheBuilder::forEachCacheDylib(void (^callback)(const std::string& path)) {
+    for (const DylibInfo& dylibInfo : _sortedDylibs)
+        callback(dylibInfo.runtimePath);
+}
+
+
+CacheBuilder::ASLR_Tracker::~ASLR_Tracker()
+{
+    if ( _bitmap != nullptr )
+        ::free(_bitmap);
+}
+
+void CacheBuilder::ASLR_Tracker::setDataRegion(const void* rwRegionStart, size_t rwRegionSize)
+{
+    _pageCount   = (unsigned)(rwRegionSize+_pageSize-1)/_pageSize;
+    _regionStart = (uint8_t*)rwRegionStart;
+    _endStart    = (uint8_t*)rwRegionStart + rwRegionSize;
+    _bitmap      = (bool*)calloc(_pageCount*(_pageSize/4)*sizeof(bool), 1);
+}
+
+void CacheBuilder::ASLR_Tracker::add(void* loc)
+{
+    uint8_t* p = (uint8_t*)loc;
+    assert(p >= _regionStart);
+    assert(p < _endStart);
+    _bitmap[(p-_regionStart)/4] = true;
+}
+
+void CacheBuilder::ASLR_Tracker::remove(void* loc)
+{
+    uint8_t* p = (uint8_t*)loc;
+    assert(p >= _regionStart);
+    assert(p < _endStart);
+    _bitmap[(p-_regionStart)/4] = false;
+}
+
+bool CacheBuilder::ASLR_Tracker::has(void* loc)
+{
+    uint8_t* p = (uint8_t*)loc;
+    assert(p >= _regionStart);
+    assert(p < _endStart);
+    return _bitmap[(p-_regionStart)/4];
+}
+
+
+
+