Loading...
cache-builder/update_dyld_sim_shared_cache.cpp dyld-1284.13 dyld-955
--- dyld/dyld-1284.13/cache-builder/update_dyld_sim_shared_cache.cpp
+++ dyld/dyld-955/cache-builder/update_dyld_sim_shared_cache.cpp
@@ -22,28 +22,60 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <mach-o/dyld.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+#include <signal.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/resource.h>
+#include <dirent.h>
+#include <rootless.h>
+#include <dscsym.h>
+#include <dispatch/dispatch.h>
+#include <pthread/pthread.h>
+#include <CoreFoundation/CoreFoundation.h>
+
+#include <algorithm>
+#include <vector>
+#include <unordered_set>
+#include <unordered_set>
+#include <iostream>
 #include <fstream>
-#include <string>
-#include <sys/mman.h>
-#include <sys/stat.h>
+#include <sstream>
 
 #include "FileUtils.h"
 #include "StringUtils.h"
+#include "DyldSharedCache.h"
 #include "MachOFile.h"
-#include "mrm_shared_cache_builder.h"
-
-struct MappedFile
-{
-    std::string path;
-    uint64_t    inode       = 0;
-    uint64_t    mtime       = 0;
-    const void* buffer      = nullptr;
-    size_t      bufferSize  = 0;
+#include "MachOAnalyzer.h"
+#include "ClosureFileSystemPhysical.h"
+
+struct MappedMachOsByCategory
+{
+    const dyld3::GradedArchs&                   archs;
+    std::vector<DyldSharedCache::MappedMachO>   dylibsForCache;
+    std::vector<DyldSharedCache::MappedMachO>   otherDylibsAndBundles;
+    std::vector<DyldSharedCache::MappedMachO>   mainExecutables;
 };
 
 static const char* sAllowedPrefixes[] = {
-    "/usr/lib",
-    "/System/Library",
+    "/usr",
+    "/System",
 // don't look at main executables until simulator supports dyld3
 //    "/bin",
 //    "/sbin",
@@ -52,7 +84,6 @@
 static const char* sDontUsePrefixes[] = {
     "/usr/share",
     "/usr/local",
-    "/usr/lib/system/introspection",
 };
 
 static const char* sMacOSHostLibs[] = {
@@ -61,6 +92,221 @@
     "/usr/lib/system/libsystem_pthread.dylib",
 };
 
+static const char* sMacOSBinaries[] = {
+    "/sbin/launchd_sim_trampoline",
+    "/usr/sbin/iokitsimd",
+    "/usr/lib/system/host/liblaunch_sim.dylib",
+};
+
+static bool verbose = false;
+
+
+static bool addIfMachO(const dyld3::closure::FileSystem& fileSystem, const std::string& runtimePath, const struct stat& statBuf, std::vector<MappedMachOsByCategory>& files, dyld3::Platform platform, Diagnostics& diag)
+{
+    // don't precompute closure info for any debug or profile dylibs
+    if ( endsWith(runtimePath, "_profile.dylib") || endsWith(runtimePath, "_debug.dylib") || endsWith(runtimePath, "_asan.dylib") || endsWith(runtimePath, "_profile") || endsWith(runtimePath, "_debug") )
+        return false;
+    if ( startsWith(runtimePath, "/usr/lib/system/introspection/") )
+        return false;
+
+    bool result = false;
+    for (MappedMachOsByCategory& file : files) {
+        char realerPath[MAXPATHLEN];
+        dyld3::closure::LoadedFileInfo loadedFileInfo = dyld3::MachOAnalyzer::load(diag, fileSystem, runtimePath.c_str(), file.archs, platform, realerPath);
+        const dyld3::MachOAnalyzer* ma = (const dyld3::MachOAnalyzer*)loadedFileInfo.fileContent;
+
+        if ( ma != nullptr ) {
+            bool            sipProtected    = false;
+            bool            issetuid        = false;
+            const uint64_t  sliceLen        = loadedFileInfo.sliceLen;
+            if ( ma->isDynamicExecutable() ) {
+        #if 0 // dyld3 not enabled for simulator yet, so don't collect main executables
+                issetuid = (statBuf.st_mode & (S_ISUID|S_ISGID));
+                file.mainExecutables.emplace_back(runtimePath, ma, sliceLen, issetuid, sipProtected, loadedFileInfo.sliceOffset, statBuf.st_mtime, statBuf.st_ino);
+                result = true;
+        #endif
+            }
+            else if ( ma->canBePlacedInDyldCache(runtimePath.c_str(), ^(const char* msg) {
+                diag.error("Dylib located at '%s' cannot be placed in cache because: '%s'", loadedFileInfo.path, msg);
+            }) ) {
+                file.dylibsForCache.emplace_back(runtimePath, ma, sliceLen, issetuid, sipProtected, loadedFileInfo.sliceOffset, statBuf.st_mtime, statBuf.st_ino);
+                result = true;
+           }
+        }
+    }
+    
+    return result;
+}
+
+static void findAllFiles(const dyld3::closure::FileSystem& fileSystem, const std::vector<std::string>& pathPrefixes, std::vector<MappedMachOsByCategory>& files, dyld3::Platform platform)
+{
+    std::unordered_set<std::string> skipDirs;
+    for (const char* s : sDontUsePrefixes)
+        skipDirs.insert(s);
+    
+    __block std::unordered_set<std::string> alreadyUsed;
+    bool multiplePrefixes = (pathPrefixes.size() > 1);
+    for (const std::string& prefix : pathPrefixes) {
+        // get all files from overlay for this search dir
+        for (const char* searchDir : sAllowedPrefixes ) {
+            iterateDirectoryTree(prefix, searchDir, ^(const std::string& dirPath) { return (skipDirs.count(dirPath) != 0); }, ^(const std::string& path, const struct stat& statBuf) {
+                // ignore files that don't have 'x' bit set (all runnable mach-o files do)
+                const bool hasXBit = ((statBuf.st_mode & S_IXOTH) == S_IXOTH);
+                if ( !hasXBit && !endsWith(path, ".dylib") )
+                    return;
+
+                // ignore files too small (must have at least a page of TEXT and LINKEDIT)
+                if ( statBuf.st_size < 0x2000 )
+                    return;
+
+                // don't add paths already found using previous prefix
+                if ( multiplePrefixes && (alreadyUsed.count(path) != 0) )
+                    return;
+                
+                // don't add binaries built for the host Mac OS platform
+                for (std::string s : sMacOSBinaries) {
+                    if (s.compare(path) == 0)
+                        return;
+                }
+                // if the file is mach-o, add to list
+                Diagnostics diag;
+                if ( addIfMachO(fileSystem, path, statBuf, files, platform, diag) ) {
+                    if ( multiplePrefixes )
+                        alreadyUsed.insert(path);
+                }
+            });
+        }
+    }
+}
+
+static void addMacOSHostLibs(std::vector<MappedMachOsByCategory>& allFileSets, dyld3::Platform platform)
+{
+    dyld3::closure::FileSystemPhysical fileSystem;
+    for (const char* path : sMacOSHostLibs) {
+        struct stat statBuf;
+        if ( stat(path, &statBuf) == 0 ) {
+            Diagnostics diag;
+            addIfMachO(fileSystem, path, statBuf, allFileSets, dyld3::Platform::macOS, diag);
+            if ( diag.hasError() )
+                fprintf(stderr, "update_dyld_sim_shared_cache: warning: skipping %s because %s\n", path, diag.errorMessage().c_str());
+        }
+    }
+}
+
+static void addMacOSBinaries(const dyld3::closure::FileSystem& fileSystem, const std::vector<std::string>& pathPrefixes, std::vector<MappedMachOsByCategory>& files)
+{
+    for (const std::string& prefix : pathPrefixes) {
+        for (std::string path : sMacOSBinaries) {
+            std::string fullPath = prefix + path;
+            struct stat statBuf;
+            if ( stat(fullPath.c_str(), &statBuf) == 0 ) {
+                Diagnostics diag;
+                addIfMachO(fileSystem, path, statBuf, files, dyld3::Platform::macOS, diag);
+                if ( diag.hasError() )
+                    fprintf(stderr, "update_dyld_sim_shared_cache: warning: skipping %s because %s\n", fullPath.c_str(), diag.errorMessage().c_str());
+            }
+        }
+    }
+}
+
+static bool dontCache(const std::string& simRuntimeRootPath, const std::string& archName,
+                      const std::unordered_set<std::string>& pathsWithDuplicateInstallName,
+                      const DyldSharedCache::MappedMachO& aFile, bool warn,
+                      const std::unordered_set<std::string>& skipDylibs)
+{
+    if ( skipDylibs.count(aFile.runtimePath) )
+        return true;
+    if ( startsWith(aFile.runtimePath, "/usr/lib/system/introspection/") )
+        return true;
+    if ( startsWith(aFile.runtimePath, "/usr/local/") )
+        return true;
+    
+    // anything inside a .app bundle is specific to app, so should not be in shared cache
+    if ( aFile.runtimePath.find(".app/") != std::string::npos )
+        return true;
+    
+    if ( aFile.runtimePath.find("//") != std::string::npos ) {
+        if (warn) fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s use of bad install name %s\n", archName.c_str(), aFile.runtimePath.c_str());
+        return true;
+    }
+    
+    const char* installName = aFile.mh->installName();
+    if ( (pathsWithDuplicateInstallName.count(aFile.runtimePath) != 0) && (aFile.runtimePath != installName) ) {
+        // <rdar://problem/46431467> if a dylib moves and a symlink is installed into its place, bom iterator will see both and issue a warning
+        struct stat statBuf;
+        bool isSymLink = ( (lstat(aFile.runtimePath.c_str(), &statBuf) == 0) && S_ISLNK(statBuf.st_mode) );
+        if (!isSymLink && warn) fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s skipping because of duplicate install name %s\n", archName.c_str(), aFile.runtimePath.c_str());
+        return true;
+    }
+    
+    if ( aFile.runtimePath != installName ) {
+        // see if install name is a symlink to actual path
+        std::string fullInstall = simRuntimeRootPath + installName;
+        char resolvedPath[PATH_MAX];
+        if ( realpath(fullInstall.c_str(), resolvedPath) != NULL ) {
+            std::string resolvedSymlink = resolvedPath;
+            if ( !simRuntimeRootPath.empty() ) {
+                resolvedSymlink = resolvedSymlink.substr(simRuntimeRootPath.size());
+            }
+            if ( aFile.runtimePath == resolvedSymlink ) {
+                return false;
+            }
+        }
+        // <rdar://problem/38000411> also if runtime path is a symlink to install name
+        std::string fullRuntime = simRuntimeRootPath + aFile.runtimePath;
+        if ( realpath(fullRuntime.c_str(), resolvedPath) != NULL ) {
+            std::string resolvedSymlink = resolvedPath;
+            if ( !simRuntimeRootPath.empty() ) {
+                resolvedSymlink = resolvedSymlink.substr(simRuntimeRootPath.size());
+            }
+            if ( resolvedSymlink == installName ) {
+                return false;
+            }
+        }
+        if (warn) fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s skipping because of bad install name %s\n", archName.c_str(), aFile.runtimePath.c_str());
+        return true;
+    }
+    return false;
+}
+
+static void pruneCachedDylibs(const std::string& volumePrefix, const std::unordered_set<std::string>& skipDylibs, MappedMachOsByCategory& fileSet)
+{
+    std::unordered_set<std::string> pathsWithDuplicateInstallName;
+    
+    std::unordered_map<std::string, std::string> installNameToFirstPath;
+    for (DyldSharedCache::MappedMachO& aFile : fileSet.dylibsForCache) {
+        const char* installName = aFile.mh->installName();
+        auto pos = installNameToFirstPath.find(installName);
+        if ( pos == installNameToFirstPath.end() ) {
+            installNameToFirstPath[installName] = aFile.runtimePath;
+        }
+        else {
+            pathsWithDuplicateInstallName.insert(aFile.runtimePath);
+            pathsWithDuplicateInstallName.insert(installNameToFirstPath[installName]);
+        }
+    }
+    
+    for (DyldSharedCache::MappedMachO& aFile : fileSet.dylibsForCache) {
+        if ( dontCache(volumePrefix, fileSet.archs.name(), pathsWithDuplicateInstallName, aFile, true, skipDylibs) ){
+            // <rdar://problem/46423929> don't build dlopen closures for symlinks to something in the dyld cache
+            if ( pathsWithDuplicateInstallName.count(aFile.runtimePath) == 0 )
+                fileSet.otherDylibsAndBundles.push_back(aFile);
+        }
+    }
+    fileSet.dylibsForCache.erase(std::remove_if(fileSet.dylibsForCache.begin(), fileSet.dylibsForCache.end(),
+                                                [&](const DyldSharedCache::MappedMachO& aFile) { return dontCache(volumePrefix, fileSet.archs.name(), pathsWithDuplicateInstallName, aFile, false, skipDylibs); }),
+                                 fileSet.dylibsForCache.end());
+}
+
+static void pruneOtherDylibs(const std::string& volumePrefix, MappedMachOsByCategory& fileSet)
+{
+    // other OS dylibs should not contain dylibs that are embedded in some .app bundle
+    fileSet.otherDylibsAndBundles.erase(std::remove_if(fileSet.otherDylibsAndBundles.begin(), fileSet.otherDylibsAndBundles.end(),
+                                                       [&](const DyldSharedCache::MappedMachO& aFile) { return (aFile.runtimePath.find(".app/") != std::string::npos); }),
+                                        fileSet.otherDylibsAndBundles.end());
+}
+
+
 static std::string getOrderFileContent(const std::string& orderFile)
 {
     std::ifstream fstream(orderFile);
@@ -69,90 +315,66 @@
     return stringBuf.str();
 }
 
-struct CacheDylibID
-{
-    std::string installName;
-    uint64_t    inode;
-    uint64_t    mtime;
-};
-
-static void getCacheDylibIDs(const std::string& existingCache, std::vector<CacheDylibID>& cacheFiles)
+static bool existingCacheUpToDate(const std::string& existingCache, const std::vector<DyldSharedCache::MappedMachO>& currentDylibs)
 {
     // if no existing cache, it is not up-to-date
     int fd = ::open(existingCache.c_str(), O_RDONLY);
     if ( fd < 0 )
-        return;
+        return false;
     struct stat statbuf;
     if ( ::fstat(fd, &statbuf) == -1 ) {
         ::close(fd);
-        return;
-    }
-
+        return false;
+    }
+    
+    // build map of found dylibs
+    std::unordered_map<std::string, const DyldSharedCache::MappedMachO*> currentDylibMap;
+    for (const DyldSharedCache::MappedMachO& aFile : currentDylibs) {
+        //fprintf(stderr, "0x%0llX 0x%0llX  %s\n", aFile.inode, aFile.modTime, aFile.runtimePath.c_str());
+        currentDylibMap[aFile.runtimePath] = &aFile;
+    }
+    
+    // make sure all dylibs in existing cache have same mtime and inode as found dylib
+    __block bool foundMismatch = false;
     const uint64_t cacheMapLen = statbuf.st_size;
     void *p = ::mmap(NULL, cacheMapLen, PROT_READ, MAP_PRIVATE, fd, 0);
     if ( p != MAP_FAILED ) {
         const DyldSharedCache* cache = (DyldSharedCache*)p;
         cache->forEachImageEntry(^(const char* installName, uint64_t mTime, uint64_t inode) {
-            cacheFiles.push_back((CacheDylibID){ installName, inode, mTime });
-        });
+            bool foundMatch = false;
+            auto pos = currentDylibMap.find(installName);
+            if ( pos != currentDylibMap.end() ) {
+                const DyldSharedCache::MappedMachO* foundDylib = pos->second;
+                if ( (foundDylib->inode == inode) && (foundDylib->modTime == mTime) ) {
+                    foundMatch = true;
+                }
+            }
+            if ( !foundMatch ) {
+                // use slow path and look for any dylib with a matching inode and mtime
+                bool foundSlow = false;
+                for (const DyldSharedCache::MappedMachO& aFile : currentDylibs) {
+                    if ( (aFile.inode == inode) && (aFile.modTime == mTime) ) {
+                        foundSlow = true;
+                        break;
+                    }
+                }
+                if ( !foundSlow ) {
+                    foundMismatch = true;
+                    if ( verbose )
+                        fprintf(stderr, "rebuilding dyld cache because dylib changed: %s\n", installName);
+                }
+            }
+         });
         ::munmap(p, cacheMapLen);
     }
+
     ::close(fd);
-}
-
-static bool allCachesUpToDate(const std::vector<const char*>& buildArchs, const std::string& cacheDir,
-                              const std::vector<MappedFile>& mappedFiles, bool verbose)
-{
-    // Get all the inode/mtimes from the cache(s)
-    std::vector<CacheDylibID> cacheDylibIDs;
-    for ( const char* arch : buildArchs ) {
-        std::string cachePath = cacheDir + "/" + "dyld_sim_shared_cache_" + arch;
-        getCacheDylibIDs(cachePath, cacheDylibIDs);
-    }
-
-    if ( cacheDylibIDs.empty() )
-        return false;
-
-    // Get all the inode/mtimes from the on-disk files we've loaded
-    struct FileID
-    {
-        uint64_t    inode;
-        uint64_t    mtime;
-    };
-    std::unordered_map<std::string_view, FileID> onDiskFileIDs;
-    for ( const MappedFile& mappedFile : mappedFiles )
-        onDiskFileIDs[mappedFile.path] = { mappedFile.inode, mappedFile.mtime };
-
-    // Compare to see if anything is out of date
-    for ( CacheDylibID& cacheDylibID : cacheDylibIDs ) {
-        auto it = onDiskFileIDs.find(cacheDylibID.installName);
-        if ( it == onDiskFileIDs.end() ) {
-            // The file is missing?  Should we rebuild?  Perhaps its a symlink
-            continue;
-        }
-
-        if ( it->second.inode != cacheDylibID.inode ) {
-            if ( verbose )
-                fprintf(stderr, "rebuilding dyld cache because dylib changed: %s\n", cacheDylibID.installName.c_str());
-            return false;
-        }
-
-        if ( it->second.mtime != cacheDylibID.mtime ) {
-            if ( verbose )
-                fprintf(stderr, "rebuilding dyld cache because dylib changed: %s\n", cacheDylibID.installName.c_str());
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static std::vector<const char*> getArchs(std::unordered_set<std::string>& requestedArchs)
-{
-    std::unordered_set<std::string> allowedArchs;
-    allowedArchs.insert("x86_64");
-    allowedArchs.insert("arm64");
-
+
+    return !foundMismatch;
+}
+
+static void addFileSets(const std::unordered_set<std::string>& allowedArchs, std::unordered_set<std::string>& requestedArchs, std::vector<MappedMachOsByCategory>& fileSets)
+{
     if ( requestedArchs.empty() ) {
 #if __arm64__
         requestedArchs.insert("arm64");
@@ -163,283 +385,17 @@
 #endif
     }
 
-    std::vector<const char*> buildArchs;
-    for ( auto& requested : requestedArchs ) {
+    for (auto& requested : requestedArchs) {
         if ( allowedArchs.find(requested) != allowedArchs.end() ) {
-            buildArchs.push_back(requested.c_str());
-        }
-    }
-
-    return buildArchs;
-}
-
-static std::optional<MappedFile> loadFile(Diagnostics& diags, const char* path)
-{
-    int fd = ::open(path, O_RDONLY, 0);
-    if ( fd == -1 ) {
-        diags.error("can't open file '%s', errno=%d\n", path, errno);
-        return std::nullopt;
-    }
-
-    struct stat statBuf;
-    if ( fstat(fd, &statBuf) == -1 ) {
-        diags.error("can't stat open file '%s', errno=%d\n", path, errno);
-        ::close(fd);
-        return std::nullopt;
-    }
-
-    const void* buffer = mmap(NULL, (size_t)statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
-    if ( buffer == MAP_FAILED ) {
-        diags.error("mmap() for file at %s failed, errno=%d\n", path, errno);
-        ::close(fd);
-        return std::nullopt;
-    }
-    ::close(fd);
-
-    uint64_t inode = statBuf.st_ino;
-    uint64_t mtime = statBuf.st_mtime;
-
-    return (MappedFile){ path, inode, mtime, buffer, (size_t)statBuf.st_size };
-}
-
-static void unloadFile(const MappedFile& mappedFile)
-{
-    ::munmap((void*)mappedFile.buffer, mappedFile.bufferSize);
-}
-
-static void loadMRMFiles(Diagnostics& diags, struct MRMSharedCacheBuilder* sharedCacheBuilder,
-                         const std::string& rootPath, std::vector<MappedFile>& mappedFiles)
-{
-    for (const char* path : sMacOSHostLibs) {
-        std::optional<MappedFile> mappedFile = loadFile(diags, path);
-        if ( !mappedFile.has_value() )
-            continue;
-
-        mappedFiles.push_back(mappedFile.value());
-
-        addOnDiskFile(sharedCacheBuilder, path, (uint8_t*)mappedFile->buffer, mappedFile->bufferSize,
-                      NoFlags, mappedFile->inode, mappedFile->mtime);
-    }
-
-    // Find files by walking the RuntimeRoot
-    std::unordered_set<std::string> skipDirs;
-    for (const char* s : sDontUsePrefixes)
-        skipDirs.insert(s);
-
-    // get all files from overlay for this search dir
-    for (const char* searchDir : sAllowedPrefixes ) {
-        iterateDirectoryTree(rootPath, searchDir,
-                             ^(const std::string& dirPath) { return (skipDirs.count(dirPath) != 0); },
-                             ^(const std::string& path, const struct stat& statBuf) {
-            // ignore files that don't have 'x' bit set (all runnable mach-o files do)
-            const bool hasXBit = ((statBuf.st_mode & S_IXOTH) == S_IXOTH);
-            if ( !hasXBit && !endsWith(path, ".dylib") )
-                return;
-
-            // ignore files too small (must have at least a page of TEXT and LINKEDIT)
-            if ( statBuf.st_size < 0x2000 )
-                return;
-
-            // if the file is mach-o, add to list
-            std::string fullPath = rootPath + "/" + path;
-            int fd = ::open(fullPath.c_str(), O_RDONLY, 0);
-            if ( fd == -1 ) {
-                // Should we warn here?  We might not are about arbitrary files not being able to be opened
-                // diags.warning("can't open file '%s', errno=%d\n", path, errno);
-                return;
-            }
-
-            const void* buffer = mmap(NULL, (size_t)statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
-            if ( buffer == MAP_FAILED ) {
-                // Should we warn here?  We might not are about arbitrary files not being able to be mapped
-                // diags.warning("mmap() for file at %s failed, errno=%d\n", path.c_str(), errno);
-                ::close(fd);
-                return;
-            }
-            ::close(fd);
-
-            uint64_t inode = statBuf.st_ino;
-            uint64_t mtime = statBuf.st_mtime;
-            mappedFiles.push_back((MappedFile){ path, inode, mtime, buffer, (size_t)statBuf.st_size });
-
-            addOnDiskFile(sharedCacheBuilder, path.c_str(), (uint8_t*)buffer, (size_t)statBuf.st_size, NoFlags, inode, mtime);
-        });
-    }
-}
-
-static void unloadMRMFiles(const std::vector<MappedFile>& mappedFiles) {
-    for ( const MappedFile& mappedFile : mappedFiles )
-        unloadFile(mappedFile);
-}
-
-static ssize_t write64(int fildes, const void *buf, size_t nbyte)
-{
-    unsigned char* uchars = (unsigned char*)buf;
-    ssize_t total = 0;
-
-    while (nbyte)
-    {
-        /*
-         * If we were writing socket- or stream-safe code we'd chuck the
-         * entire buf to write(2) and then gracefully re-request bytes that
-         * didn't get written. But write(2) will return EINVAL if you ask it to
-         * write more than 2^31-1 bytes. So instead we actually need to throttle
-         * the input to write.
-         *
-         * Historically code using write(2) to write to disk will assert that
-         * that all of the requested bytes were written. It seems harmless to
-         * re-request bytes as one does when writing to streams, with the
-         * compromise that we will return immediately when write(2) returns 0
-         * bytes written.
-         */
-        size_t limit = 0x7FFFFFFF;
-        size_t towrite = nbyte < limit ? nbyte : limit;
-        ssize_t wrote = write(fildes, uchars, towrite);
-        if (-1 == wrote)
-        {
-            return -1;
-        }
-        else if (0 == wrote)
-        {
-            break;
-        }
-        else
-        {
-            nbyte -= wrote;
-            uchars += wrote;
-            total += wrote;
-        }
-    }
-
-    return total;
-}
-
-static bool writeMRMResults(bool cacheBuildSuccess, MRMSharedCacheBuilder* sharedCacheBuilder,
-                            const std::string& dstRoot, bool verbose) {
-    if (!cacheBuildSuccess) {
-        uint64_t errorCount = 0;
-        if (const char* const* errors = getErrors(sharedCacheBuilder, &errorCount)) {
-            for (uint64_t i = 0, e = errorCount; i != e; ++i) {
-                const char* errorMessage = errors[i];
-                fprintf(stderr, "ERROR: %s\n", errorMessage);
-            }
-        }
-    }
-
-    // Now emit each cache we generated, or the errors for them.
-    uint64_t cacheResultCount = 0;
-    if (const CacheResult* const* cacheResults = getCacheResults(sharedCacheBuilder, &cacheResultCount)) {
-        for (uint64_t i = 0, e = cacheResultCount; i != e; ++i) {
-            const CacheResult& result = *(cacheResults[i]);
-            if ( (result.numErrors == 0) || verbose ) {
-                for (uint64_t warningIndex = 0; warningIndex != result.numWarnings; ++warningIndex) {
-                    fprintf(stderr, "[%s] WARNING: %s\n", result.loggingPrefix, result.warnings[warningIndex]);
-                }
-            }
-            if (result.numErrors) {
-                for (uint64_t errorIndex = 0; errorIndex != result.numErrors; ++errorIndex) {
-                    fprintf(stderr, "[%s] ERROR: %s\n", result.loggingPrefix, result.errors[errorIndex]);
-                }
-                cacheBuildSuccess = false;
-            }
-        }
-    }
-
-    if (!cacheBuildSuccess) {
-        return false;
-    }
-
-    // If we built caches, then write everything out.
-    uint64_t fileResultCount = 0;
-    if (const FileResult* const* fileResults = getFileResults(sharedCacheBuilder, &fileResultCount)) {
-        for (uint64_t i = 0, e = fileResultCount; i != e; ++i) {
-            const FileResult* fileResultPtr = fileResults[i];
-
-            assert(fileResultPtr->version == 1);
-            const FileResult_v1& fileResult = *(const FileResult_v1*)fileResultPtr;
-
-            switch (fileResult.behavior) {
-                case AddFile:
-                    break;
-                case ChangeFile:
-                    continue;
-            }
-
-            // We don't have an FD on the sim caches.  See rdar://66598213
-#if !SUPPORT_CACHE_BUILDER_MEMORY_BUFFERS
-#error Invalid config
-#endif
-            if ( fileResult.data != nullptr ) {
-                const std::string path = dstRoot + "/" + fileResult.path;
-                std::string pathTemplate = path + "-XXXXXX";
-                size_t templateLen = strlen(pathTemplate.c_str())+2;
-                char pathTemplateSpace[templateLen];
-                strlcpy(pathTemplateSpace, pathTemplate.c_str(), templateLen);
-                int fd = mkstemp(pathTemplateSpace);
-                if ( fd != -1 ) {
-                    ::ftruncate(fd, fileResult.size);
-                    uint64_t writtenSize = write64(fd, fileResult.data, fileResult.size);
-                    if ( writtenSize == fileResult.size ) {
-                        ::fchmod(fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); // mkstemp() makes file "rw-------", switch it to "rw-r--r--"
-                        if ( ::rename(pathTemplateSpace, path.c_str()) == 0) {
-                            ::close(fd);
-                            continue; // success
-                        }
-                    }
-                    else {
-                        fprintf(stderr, "ERROR: could not write file %s\n", pathTemplateSpace);
-                        cacheBuildSuccess = false;
-                    }
-                    ::close(fd);
-                    ::unlink(pathTemplateSpace);
-                }
-                else {
-                    fprintf(stderr, "ERROR: could not open file %s\n", pathTemplateSpace);
-                    cacheBuildSuccess = false;
-                }
-            }
-        }
-    }
-
-    // Give up if we couldn't write the caches
-    if (!cacheBuildSuccess) {
-        return false;
-    }
-
-    return true;
-}
-
-static mach_o::Platform getPlatform(Diagnostics& diags, std::string_view rootPath)
-{
-    // Infer the platform from dyld_sim
-    std::string dyldSimPath = std::string(rootPath) + "/usr/lib/dyld_sim";
-
-    std::optional<MappedFile> mappedFile = loadFile(diags, dyldSimPath.c_str());
-    if ( !mappedFile.has_value() )
-        return mach_o::Platform();
-
-    __block mach_o::Platform platform = mach_o::Platform();
-    if ( dyld3::FatFile::isFatFile(mappedFile->buffer) ) {
-        const dyld3::FatFile* ff = (dyld3::FatFile*)mappedFile->buffer;
-        ff->forEachSlice(diags, mappedFile->bufferSize,
-                         ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop) {
-            const mach_o::Header* mh = (mach_o::Header*)sliceStart;
-            mach_o::PlatformAndVersions pvs = mh->platformAndVersions();
-            if ( platform.empty() )
-                platform = pvs.platform;
-        });
-    } else {
-        const mach_o::Header* mh = (mach_o::Header*)mappedFile->buffer;
-        if ( mh->valid(mappedFile->bufferSize) ) {
-            mach_o::PlatformAndVersions pvs = mh->platformAndVersions();
-            if ( platform.empty() )
-                platform = pvs.platform;
-        }
-    }
-
-    unloadFile(mappedFile.value());
-
-    return platform;
+            const dyld3::GradedArchs& archs = dyld3::GradedArchs::forName(requested.c_str(), false);
+            fileSets.push_back({archs});
+        }
+    }
+}
+
+inline uint32_t absolutetime_to_milliseconds(uint64_t abstime)
+{
+    return (uint32_t)(abstime/1000/1000);
 }
 
 
@@ -451,18 +407,15 @@
         }                               \
     } while ( 0 )
 
-
 int main(int argc, const char* argv[], const char* envp[])
 {
     std::string                     rootPath;
-    bool                            verbose = false;
-    bool                            timePasses = false;
-    bool                            printStats = false;
     bool                            force = false;
     bool                            dylibsRemoved = false;
     std::string                     cacheDir;
     std::string                     dylibOrderFile;
     std::string                     dirtyDataOrderFile;
+    dyld3::Platform                 platform = dyld3::Platform::iOS_simulator;
     std::unordered_set<std::string> skipDylibs;
     std::unordered_set<std::string> requestedArchs;
     
@@ -475,12 +428,6 @@
         else if (strcmp(arg, "-verbose") == 0) {
             verbose = true;
         }
-        else if (strcmp(arg, "-time-passes") == 0) {
-            timePasses = true;
-        }
-        else if (strcmp(arg, "-stats") == 0) {
-            printStats = true;
-        }
         else if ((strcmp(arg, "-root") == 0) || (strcmp(arg, "--root") == 0)) {
             TERMINATE_IF_LAST_ARG("-root missing path argument\n");
             rootPath = argv[++i];
@@ -490,13 +437,13 @@
             cacheDir = argv[++i];
         }
         else if (strcmp(arg, "-iOS") == 0) {
-            // Unused.  We infer the platform from dyld_sim now
+            platform = dyld3::Platform::iOS_simulator;
         }
         else if (strcmp(arg, "-watchOS") == 0) {
-            // Unused.  We infer the platform from dyld_sim now
+            platform = dyld3::Platform::watchOS_simulator;
         }
         else if (strcmp(arg, "-tvOS") == 0) {
-            // Unused.  We infer the platform from dyld_sim now
+            platform = dyld3::Platform::tvOS_simulator;
         }
         else if (strcmp(arg, "-dylibs_removed_in_mastering") == 0) {
             dylibsRemoved = true;
@@ -554,89 +501,195 @@
         return 1;
     }
 
+    std::vector<std::string> pathPrefixes;
+
+    pathPrefixes.push_back(rootPath);
+    
+    // build FileSystem object
+    const char* fsRoot = rootPath.empty() ? nullptr : rootPath.c_str();
+    dyld3::closure::FileSystemPhysical fileSystem(fsRoot, nullptr);
+
+    
     int err = mkpath_np(cacheDir.c_str(), S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
     if ( (err != 0) && (err != EEXIST) ) {
         fprintf(stderr, "update_dyld_sim_shared_cache: could not access cache dir: mkpath_np(%s) failed errno=%d\n", cacheDir.c_str(), err);
         return 1;
     }
 
-    std::vector<const char*> buildArchs = getArchs(requestedArchs);
-    if ( buildArchs.empty() ) {
-        fprintf(stderr, "update_dyld_sim_shared_cache: error: no valid architecture specified\n");
-        return 1;
-    }
-
-    // The platform comes from dyld_sim now
-    Diagnostics diags;
-    mach_o::Platform platform = getPlatform(diags, rootPath);
-    if ( diags.hasError() ) {
-        fprintf(stderr, "update_dyld_sim_shared_cache: error: could not find sim platform because: %s\n",
-                diags.errorMessageCStr());
-        return -1;
-    }
-
-    // Make a cache builder and run it
-    BuildOptions_v3 buildOptions;
-    buildOptions.version                            = 3;
-    buildOptions.updateName                         = "sim";
-    buildOptions.deviceName                         = "sim";
-    buildOptions.disposition                        = InternalMinDevelopment;
-    buildOptions.platform                           = (Platform)platform.value();
-    buildOptions.archs                              = buildArchs.data();
-    buildOptions.numArchs                           = buildArchs.size();
-    buildOptions.verboseDiagnostics                 = verbose;
-    buildOptions.isLocallyBuiltCache                = true;
-    buildOptions.optimizeForSize                    = false;
-    buildOptions.filesRemovedFromDisk               = false;
-    buildOptions.timePasses                         = timePasses;
-    buildOptions.printStats                         = printStats;
-
-    struct MRMSharedCacheBuilder* sharedCacheBuilder = createSharedCacheBuilder((const BuildOptions_v1*)&buildOptions);
-
-    std::vector<MappedFile> mappedFiles;
-
-    {
-        uint64_t startTimeNanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
-        loadMRMFiles(diags, sharedCacheBuilder, rootPath, mappedFiles);
-        uint64_t endTimeNanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
-
-        if ( buildOptions.timePasses ) {
-            uint64_t timeMillis = (endTimeNanos - startTimeNanos) / 1000000;
-            fprintf(stderr, "loadMRMFiles: time = %lldms\n", timeMillis);
-        }
-    }
-
-    if ( diags.hasError() ) {
-        fprintf(stderr, "update_dyld_sim_shared_cache: error: %s", diags.errorMessage().c_str());
-        return -1;
-    }
-
     std::string dylibOrderFileContent;
     if ( !dylibOrderFile.empty() ) {
         dylibOrderFileContent = getOrderFileContent(dylibOrderFile);
-        addFile(sharedCacheBuilder, "dyld internal dirty data file", (uint8_t*)dylibOrderFileContent.data(), (size_t)dylibOrderFileContent.size(), DylibOrderFile);
     }
 
     std::string dirtyDataOrderFileContent;
     if ( !dirtyDataOrderFile.empty() ) {
         dirtyDataOrderFileContent = getOrderFileContent(dirtyDataOrderFile);
-        addFile(sharedCacheBuilder, "dyld internal dirty data order file", (uint8_t*)dirtyDataOrderFileContent.data(), (size_t)dirtyDataOrderFileContent.size(), DirtyDataOrderFile);
-    }
-
-    // check if cache is already up to date
-    if ( !force ) {
-        if ( allCachesUpToDate(buildArchs, cacheDir, mappedFiles, verbose) )
-            return 0;
-    }
-
-    bool cacheBuildSuccess = runSharedCacheBuilder(sharedCacheBuilder);
-
-    writeMRMResults(cacheBuildSuccess, sharedCacheBuilder, cacheDir, verbose);
-
-    destroySharedCacheBuilder(sharedCacheBuilder);
-
-    unloadMRMFiles(mappedFiles);
-    
-    return (cacheBuildSuccess ? 0 : 1);
-}
-
+    }
+    uint64_t t1 = mach_absolute_time();
+
+    __block std::vector<MappedMachOsByCategory> allFileSets;
+    std::unordered_set<std::string> allowedArchs;
+    switch ( platform ) {
+        case dyld3::Platform::iOS_simulator:
+            allowedArchs.insert("x86_64");
+            allowedArchs.insert("arm64");
+            break;
+        case dyld3::Platform::watchOS_simulator:
+            allowedArchs.insert("x86_64");
+            allowedArchs.insert("arm64");
+
+            break;
+        case dyld3::Platform::tvOS_simulator:
+            allowedArchs.insert("x86_64");
+            allowedArchs.insert("arm64");
+            break;
+        default:
+            assert(0 && "invalid platform");
+            break;
+    }
+
+    addFileSets(allowedArchs, requestedArchs, allFileSets);
+    if ( allFileSets.empty() ) {
+        fprintf(stderr, "update_dyld_sim_shared_cache: error: no valid architecture specified\n");
+        return 1;
+    }
+
+    findAllFiles(fileSystem, pathPrefixes, allFileSets, platform);
+    addMacOSHostLibs(allFileSets, platform);
+    addMacOSBinaries(fileSystem, pathPrefixes, allFileSets);
+
+    // nothing in OS uses i386 dylibs, so only dylibs used by third party apps need to be in cache
+    for (MappedMachOsByCategory& fileSet : allFileSets) {
+        pruneCachedDylibs(rootPath, skipDylibs, fileSet);
+        pruneOtherDylibs(rootPath, fileSet);
+    }
+
+    uint64_t t2 = mach_absolute_time();
+    if ( verbose ) {
+        fprintf(stderr, "time to scan file system and construct lists of mach-o files: %ums\n", absolutetime_to_milliseconds(t2-t1));
+    }
+    
+    // build caches in parallel on machines with at leat 4GB of RAM
+    uint64_t memSize = 0;
+    size_t sz = sizeof(memSize);
+    bool buildInParallel = false;
+    if ( sysctlbyname("hw.memsize", &memSize, &sz, NULL, 0) == 0 ) {
+        if ( memSize >= 0x100000000ULL )
+            buildInParallel = true;
+    }
+    dispatch_queue_t dqueue = buildInParallel ? dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0)
+    : dispatch_queue_create("serial-queue", DISPATCH_QUEUE_SERIAL);
+    
+    // build all caches
+    __block bool cacheBuildFailure = false;
+    __block bool wroteSomeCacheFile = false;
+    dispatch_apply(allFileSets.size(), dqueue, ^(size_t index) {
+        MappedMachOsByCategory& fileSet = allFileSets[index];
+        const std::string outFile = cacheDir + "/dyld_sim_shared_cache_" + fileSet.archs.name();
+        
+        DyldSharedCache::MappedMachO (^loader)(const std::string&, Diagnostics&) = ^DyldSharedCache::MappedMachO(const std::string& runtimePath, Diagnostics& diag) {
+            if ( skipDylibs.count(runtimePath) )
+                return DyldSharedCache::MappedMachO();
+
+            for (const std::string& prefix : pathPrefixes) {
+                std::string fullPath = prefix + runtimePath;
+                struct stat statBuf;
+                if ( stat(fullPath.c_str(), &statBuf) == 0 ) {
+                    char truePath[PATH_MAX];
+                    if ( realpath(fullPath.c_str(), truePath) != NULL ) {
+                        std::string resolvedSymlink = truePath;
+                        if ( !rootPath.empty() ) {
+                            resolvedSymlink = resolvedSymlink.substr(rootPath.size());
+                        }
+                        if ( (runtimePath != resolvedSymlink) && !contains(runtimePath, "InputContext") ) {  //HACK remove InputContext when fixed
+                            // path requested is a symlink path, check if real path already loaded
+                            for (const DyldSharedCache::MappedMachO& aDylibMapping : fileSet.dylibsForCache) {
+                                if ( aDylibMapping.runtimePath == resolvedSymlink ) {
+                                    if ( verbose )
+                                        fprintf(stderr, "verifySelfContained, redirect %s to %s\n", runtimePath.c_str(), aDylibMapping.runtimePath.c_str());
+                                    return aDylibMapping;
+                                }
+                            }
+                        }
+                    }
+                    
+                    std::vector<MappedMachOsByCategory> mappedFiles;
+                    mappedFiles.push_back({fileSet.archs});
+                    if ( addIfMachO(fileSystem, runtimePath, statBuf, mappedFiles, platform, diag) ) {
+                        if ( !mappedFiles.back().dylibsForCache.empty() ) {
+                            if ( verbose )
+                                fprintf(stderr, "verifySelfContained, add %s\n", mappedFiles.back().dylibsForCache.back().runtimePath.c_str());
+                            return mappedFiles.back().dylibsForCache.back();
+                        }
+                    }
+                }
+            }
+            return DyldSharedCache::MappedMachO();
+        };
+        size_t startCount = fileSet.dylibsForCache.size();
+        std::vector<std::pair<DyldSharedCache::MappedMachO, std::set<std::string>>> excludes;
+        std::unordered_set<std::string> badZippered;
+        DyldSharedCache::verifySelfContained(fileSet.dylibsForCache, badZippered, loader, excludes);
+        for (size_t i=startCount; i < fileSet.dylibsForCache.size(); ++i) {
+            fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s not in initial scan, but adding required dylib %s\n", fileSet.archs.name(), fileSet.dylibsForCache[i].runtimePath.c_str());
+        }
+        for (auto& exclude : excludes) {
+            std::string reasons = "(\"";
+            for (auto i = exclude.second.begin(); i != exclude.second.end(); ++i) {
+                reasons += *i;
+                if (i != --exclude.second.end()) {
+                    reasons += "\", \"";
+                }
+            }
+            reasons += "\")";
+            fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s rejected from cached dylibs: %s (%s)\n", fileSet.archs.name(), exclude.first.runtimePath.c_str(), reasons.c_str());
+            fileSet.otherDylibsAndBundles.push_back(exclude.first);
+        }
+        
+        // check if cache is already up to date
+        if ( !force ) {
+            if ( existingCacheUpToDate(outFile, fileSet.dylibsForCache) )
+                return;
+        }
+        
+        
+        // build cache new cache file
+        DyldSharedCache::CreateOptions options;
+        options.outputFilePath               = outFile;
+        options.outputMapFilePath            = cacheDir + "/dyld_sim_shared_cache_" + fileSet.archs.name() + ".map";
+        options.archs                        = &fileSet.archs;
+        options.platform                     = platform;
+        options.localSymbolMode              = DyldSharedCache::LocalSymbolsMode::keep;
+        options.optimizeStubs                = false;
+        options.optimizeDyldDlopens          = false;   // don't add dyld3 closures to simulator cache
+        options.optimizeDyldLaunches         = false;   // don't add dyld3 closures to simulator cache
+        options.codeSigningDigestMode        = DyldSharedCache::SHA256only;
+        options.dylibsRemovedDuringMastering = dylibsRemoved;
+        options.inodesAreSameAsRuntime       = true;
+        options.cacheSupportsASLR            = false;
+        options.forSimulator                 = true;
+        options.isLocallyBuiltCache          = true;
+        options.verbose                      = verbose;
+        options.evictLeafDylibsOnOverflow    = true;
+        options.dylibOrdering                = parseOrderFile(dylibOrderFileContent);
+        options.dirtyDataSegmentOrdering     = parseOrderFile(dirtyDataOrderFileContent);
+        DyldSharedCache::CreateResults results = DyldSharedCache::create(options, fileSystem, fileSet.dylibsForCache, fileSet.otherDylibsAndBundles, fileSet.mainExecutables);
+        
+        // print any warnings
+        for (const std::string& warn : results.warnings) {
+            fprintf(stderr, "update_dyld_sim_shared_cache: warning: %s %s\n", fileSet.archs.name(), warn.c_str());
+        }
+        if ( results.errorMessage.empty() ) {
+            wroteSomeCacheFile = true;
+        }
+        else {
+            fprintf(stderr, "update_dyld_sim_shared_cache: %s\n", results.errorMessage.c_str());
+            cacheBuildFailure = true;
+        }
+    });
+    
+    // we could unmap all input files, but tool is about to quit
+
+    return (cacheBuildFailure ? 1 : 0);
+}
+