Loading...
cache_builder/NewSharedCacheBuilder.cpp dyld-1165.3 dyld-1335
--- dyld/dyld-1165.3/cache_builder/NewSharedCacheBuilder.cpp
+++ dyld/dyld-1335/cache_builder/NewSharedCacheBuilder.cpp
@@ -24,9 +24,11 @@
 
 #include "Defines.h"
 #include "NewSharedCacheBuilder.h"
+#include "MachOFile.h"
 #include "NewAdjustDylibSegments.h"
 #include "CacheDylib.h"
 #include "ClosureFileSystem.h"
+#include "JSONReader.h"
 #include "JSONWriter.h"
 #include "StringUtils.h"
 #include "Array.h"
@@ -38,11 +40,26 @@
 #include "JustInTimeLoader.h"
 #include "OptimizerObjC.h"
 #include "OptimizerSwift.h"
+#include "Platform.h"
 #include "PrebuiltLoader.h"
 #include "DyldProcessConfig.h"
 #include "DyldRuntimeState.h"
 #include "SwiftVisitor.h"
 #include "ParallelUtils.h"
+#include "CString.h"
+#include "Version32.h"
+#include "ExternalGenericMetadataBuilderImport.h"
+#include "SnapshotShared.h"
+#include "AAREncoder.h"
+#include <SharedCacheLinker/SharedCacheLinker.h>
+#include "ThreadLocalVariables.h"
+#include "Algorithm.h"
+
+// mach_o
+#include "ExportsTrie.h"
+#include "Header.h"
+#include "Image.h"
+#include "Misc.h"
 
 // FIXME: Remove this once we don't write to the old objc header struct.  See emitObjCOptsHeader()
 #include "objc-shared-cache.h"
@@ -51,20 +68,31 @@
 #include <list>
 #include <mach-o/nlist.h>
 #include <sstream>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
 #include <unordered_set>
 
-using dyld3::GradedArchs;
 using dyld3::MachOFile;
 
 using dyld4::JustInTimeLoader;
 using dyld4::KernelArgs;
 using dyld4::Loader;
 using dyld4::ProcessConfig;
+using dyld4::RuntimeLocks;
 using dyld4::RuntimeState;
 using dyld4::SyscallDelegate;
 using dyld4::RuntimeLocks;
 
-using lsl::EphemeralAllocator;
+using lsl::Allocator;
+
+using mach_o::Header;
+using mach_o::Platform;
+using mach_o::Image;
+using mach_o::ExportsTrie;
+using mach_o::Symbol;
+using mach_o::LinkedDylibAttributes;
+using mach_o::Version32;
+using error::Error;
 
 using metadata_visitor::SwiftConformance;
 using metadata_visitor::SwiftVisitor;
@@ -90,14 +118,30 @@
             // Note, don't change the form of this message without checking in with MRM, as they
             // parse it.  We really need to add structured errors/warnings some time
             std::string reason = "Dylib located at '" + inputFile.path + "' not placed in shared cache because: ";
-            callback(reason + inputFile.getError().message());
-        }
+            for ( const error::Error& err : inputFile.getErrors() )
+                callback(reason + err.message());
+        }
+    }
+
+    for ( const std::string& warning : this->warnings )
+        callback(warning);
+}
+
+void SharedCacheBuilder::forEachError(void (^callback)(const std::string_view& str)) const
+{
+    for ( const std::string& str : this->errors ) {
+        callback(str);
     }
 }
 
 void SharedCacheBuilder::forEachCacheDylib(void (^callback)(const std::string_view& path)) const
 {
     for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
+        // skip Swift prespecialized dylib if it's been built
+        // it's synthesized by the builder, so mrm doesn't need to remove it
+        if ( swiftPrespecializedDylib && &cacheDylib == swiftPrespecializedDylib )
+            continue;
+
         // Note this has to return the path, not the install name, as MRM uses this to delete
         // the path from disk
         callback(cacheDylib.inputFile->path);
@@ -111,39 +155,210 @@
     }
 }
 
+struct DylibSizes
+{
+    uint64_t textSegment        = 0;
+    uint64_t dataSegment        = 0;
+    uint64_t dataConstSegment   = 0;
+    uint64_t dataDirtySegment   = 0;
+    uint64_t authSegment        = 0;
+    uint64_t authConstSegment   = 0;
+    uint64_t linkeditSegment    = 0;
+
+    // any other segment not in the above
+    uint64_t otherSegment       = 0;
+
+    // Just the __TEXT __text section
+    uint64_t textSection        = 0;
+
+    // parts of the linkedit
+    uint64_t exportTrie         = 0;
+    uint64_t symbolTable        = 0;
+    uint64_t symbolStrings      = 0;
+};
+
+static DylibSizes getDylibSizes(std::span<const SubCache> subCaches,
+                                std::span<const CacheDylib> cacheDylibs)
+{
+    __block DylibSizes dylibSizes;
+    __block std::unordered_set<uint64_t> linkeditSizes;
+
+    // segment sizes
+    for ( const CacheDylib& cacheDylib : cacheDylibs ) {
+        cacheDylib.cacheHdr->forEachSegment(^(const Header::SegmentInfo& info, bool& stop) {
+            if ( info.segmentName == "__TEXT" )
+                dylibSizes.textSegment += info.vmsize;
+            else if ( info.segmentName == "__DATA" )
+                dylibSizes.dataSegment += info.vmsize;
+            else if ( info.segmentName == "__DATA_CONST" )
+                dylibSizes.dataConstSegment += info.vmsize;
+            else if ( info.segmentName == "__DATA_DIRTY" )
+                dylibSizes.dataDirtySegment += info.vmsize;
+            else if ( info.segmentName == "__AUTH" )
+                dylibSizes.authSegment += info.vmsize;
+            else if ( info.segmentName == "__AUTH_CONST" )
+                dylibSizes.authConstSegment += info.vmsize;
+            else if ( info.segmentName == "__LINKEDIT" )
+                linkeditSizes.insert(info.vmsize);
+            else
+                dylibSizes.otherSegment += info.vmsize;
+        });
+    }
+    for ( uint64_t size : linkeditSizes )
+        dylibSizes.linkeditSegment += size;
+
+    // section sizes
+    for ( const CacheDylib& cacheDylib : cacheDylibs ) {
+        cacheDylib.cacheHdr->forEachSection(^(const Header::SectionInfo& info, bool& stop) {
+            if ( (info.segmentName == "__TEXT") && (info.sectionName == "__text") )
+                dylibSizes.textSection += info.size;
+        });
+    }
+
+    // linkedit sizes
+    for ( const CacheDylib& cacheDylib : cacheDylibs ) {
+        for ( const LinkeditDataChunk& chunk : cacheDylib.linkeditChunks ) {
+            if ( chunk.kind == Chunk::Kind::linkeditExportTrie )
+                dylibSizes.exportTrie += chunk.cacheVMSize.rawValue();
+        }
+
+        if ( !cacheDylib.optimizedSymbols.nlist64.empty() ) {
+            dylibSizes.symbolTable += (sizeof(struct nlist_64) * cacheDylib.optimizedSymbols.nlist64.size());
+        } else {
+            dylibSizes.symbolTable += (sizeof(struct nlist) * cacheDylib.optimizedSymbols.nlist64.size());
+        }
+    }
+
+    for ( const SubCache& subCache : subCaches ) {
+        if ( subCache.optimizedSymbolStrings )
+            dylibSizes.symbolStrings += subCache.optimizedSymbolStrings->cacheVMSize.rawValue();
+    }
+
+    return dylibSizes;
+}
+
+std::string SharedCacheBuilder::stats(uint64_t startTimeNanos) const
+{
+    uint64_t endTimeNanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
+    uint64_t totalTime = (endTimeNanos - startTimeNanos) / 1000000000;
+
+    DylibSizes dylibSizes = getDylibSizes(this->subCaches, this->cacheDylibs);
+
+    std::string result;
+    result += "{\n";
+
+    // add all the top level fields
+    result += std::string("  \"version\": 1,\n");
+    result += std::string("  \"minor-version\": 0,\n");
+    result += std::string("  \"update\": \"") + this->options.updateName + "\",\n";
+    result += std::string("  \"device\": \"") + this->options.deviceName + "\",\n";
+    result += std::string("  \"platform\": \"") + this->options.platform.name().c_str() + "\",\n";
+    result += std::string("  \"arch\": \"") + this->options.arch.name() + "\",\n";
+
+    // main stats
+    result += std::string("  \"dylibs-count\": ") + std::to_string(this->cacheDylibs.size()) + ",\n";
+    result += std::string("  \"total-vm-size\": ") + std::to_string(this->totalVMSize.rawValue()) + ",\n";
+    result += std::string("  \"total-text-segment-size\": ") + std::to_string(dylibSizes.textSegment) + ",\n";
+    result += std::string("  \"total-data-segment-size\": ") + std::to_string(dylibSizes.dataSegment) + ",\n";
+    result += std::string("  \"total-data-dirty-segment-size\": ") + std::to_string(dylibSizes.dataDirtySegment) + ",\n";
+    result += std::string("  \"total-data-const-segment-size\": ") + std::to_string(dylibSizes.dataConstSegment) + ",\n";
+    result += std::string("  \"total-auth-segment-size\": ") + std::to_string(dylibSizes.authSegment) + ",\n";
+    result += std::string("  \"total-auth-const-segment-size\": ") + std::to_string(dylibSizes.authConstSegment) + ",\n";
+    result += std::string("  \"total-other-segment-size\": ") + std::to_string(dylibSizes.otherSegment) + ",\n";
+    result += std::string("  \"total-linkedit-segment-size\": ") + std::to_string(dylibSizes.linkeditSegment) + ",\n";
+    result += std::string("  \"total-text-section-size\": ") + std::to_string(dylibSizes.textSection) + ",\n";
+
+    // optimization stats
+    result += std::string("  \"objc-selector-hash-table-size\": ") + std::to_string(this->objcSelectorOptimizer.selectorHashTableTotalByteSize) + ",\n";
+    result += std::string("  \"objc-class-hash-table-size\": ") + std::to_string(this->objcClassOptimizer.classHashTableTotalByteSize) + ",\n";
+    result += std::string("  \"objc-protocol-hash-table-size\": ") + std::to_string(this->objcProtocolOptimizer.protocolHashTableTotalByteSize) + ",\n";
+    result += std::string("  \"objc-selectors-count\": ") + std::to_string(this->objcSelectorOptimizer.selectorsMap.size()) + ",\n";
+    result += std::string("  \"objc-classes-count\": ") + std::to_string(this->objcClassOptimizer.namesMap.size()) + ",\n";
+    result += std::string("  \"objc-protocols-count\": ") + std::to_string(this->objcProtocolOptimizer.namesMap.size()) + ",\n";
+
+    // linkedit breakdown
+    result += std::string("  \"linkedit-export-trie-size\": ") + std::to_string(dylibSizes.exportTrie) + ",\n";
+    result += std::string("  \"linkedit-symbol-table-size\": ") + std::to_string(dylibSizes.symbolTable) + ",\n";
+    result += std::string("  \"linkedit-symbol-strings-size\": ") + std::to_string(dylibSizes.symbolStrings) + ",\n";
+
+    // cache metadata
+    result += std::string("  \"patch-table-size\": ") + std::to_string(this->patchTableOptimizer.patchTableChunk->cacheVMSize.rawValue()) + ",\n";
+    result += std::string("  \"dylib-loaders-size\": ") + std::to_string(this->prebuiltLoaderBuilder.cacheDylibsLoaderSize) + ",\n";
+    result += std::string("  \"executable-loaders-size\": ") + std::to_string(this->prebuiltLoaderBuilder.executablesLoaderSize) + ",\n";
+
+    // Times
+    result += std::string("  \"total-build-time\": ") + std::to_string(totalTime) + "\n";
+
+    result += "}\n";
+    return result;
+}
+
 void SharedCacheBuilder::addFile(const void* buffer, size_t bufferSize, std::string_view path,
                                  uint64_t inode, uint64_t modTime, bool forceNotCacheEligible)
 {
+    const bool  isOSBinary = false;
+    uint64_t    sliceOffset = 0;
+    uint64_t    sliceLen = 0;
+
+    // On macOS, also allow iOSMac dylibs
+    if ( this->options.platform == Platform::macOS ) {
+        Diagnostics diag;
+        if ( const MachOFile* mf = MachOFile::compatibleSlice(diag, sliceOffset, sliceLen, buffer, bufferSize, path.data(),
+                                                              Platform::macCatalyst, isOSBinary,
+                                                              this->options.gradedArchs) ) {
+            InputFile inputFile;
+            inputFile.mf                    = mf;
+            inputFile.inode                 = inode;
+            inputFile.mtime                 = modTime;
+            inputFile.size                  = sliceLen;
+            inputFile.path                  = path;
+            inputFile.forceNotCacheEligible = forceNotCacheEligible;
+            allInputFiles.push_back(std::move(inputFile));
+            return;
+        }
+    }
+
     Diagnostics diag;
-    const bool  isOSBinary = false;
-    if ( const MachOFile* mf = MachOFile::compatibleSlice(diag, buffer, bufferSize, path.data(),
+    if ( const MachOFile* mf = MachOFile::compatibleSlice(diag, sliceOffset, sliceLen, buffer, bufferSize, path.data(),
                                                           this->options.platform, isOSBinary,
-                                                          this->options.archs) ) {
+                                                          this->options.gradedArchs) ) {
         InputFile inputFile;
         inputFile.mf                    = mf;
         inputFile.inode                 = inode;
         inputFile.mtime                 = modTime;
+        inputFile.size                  = sliceLen;
         inputFile.path                  = path;
         inputFile.forceNotCacheEligible = forceNotCacheEligible;
         allInputFiles.push_back(std::move(inputFile));
         return;
     }
 
-    // On macOS, also allow iOSMac dylibs
-    if ( this->options.platform == dyld3::Platform::macOS ) {
-        diag.clearError();
-        if ( const MachOFile* mf = MachOFile::compatibleSlice(diag, buffer, bufferSize, path.data(),
-                                                              dyld3::Platform::iOSMac, isOSBinary,
-                                                              this->options.archs) ) {
-            InputFile inputFile;
-            inputFile.mf                    = mf;
-            inputFile.inode                 = inode;
-            inputFile.mtime                 = modTime;
-            inputFile.path                  = path;
-            inputFile.forceNotCacheEligible = forceNotCacheEligible;
-            allInputFiles.push_back(std::move(inputFile));
+    // If we have an error, then add an input file just in case its useful for later
+    if ( diag.hasError() ) {
+        // Only add dylibs to track for later error emission
+        __block bool isDylib = false;
+        const std::span<uint8_t> bufferSpan = { (uint8_t*)buffer, bufferSize };
+        mach_o::Error parseErr = mach_o::forEachHeader(bufferSpan, path,
+                                                       ^(const mach_o::Header* mh, size_t sliceHeader, bool& stop) {
+            if ( mh->isDylib() ) {
+                isDylib = true;
+                stop = true;
+            }
+        });
+
+        if ( !isDylib )
             return;
-        }
+
+        InputFile inputFile;
+        inputFile.mf                    = nullptr;
+        inputFile.inode                 = 0;
+        inputFile.mtime                 = 0;
+        inputFile.size                  = 0;
+        inputFile.path                  = path;
+        inputFile.forceNotCacheEligible = true;
+        inputFile.addError(Error("%s", (const char*)diag.errorMessageCStr()));
+        allInputFiles.push_back(std::move(inputFile));
+        return;
     }
 }
 
@@ -153,6 +368,7 @@
     this->inputAliases = aliases;
     this->inputIntermediateAliases = intermediateAliases;
 }
+
 
 //
 // MARK: --- SharedCacheBuilder build methods ---
@@ -167,6 +383,9 @@
     if ( this->allInputFiles.empty() )
         return Error("Cannot build cache with no inputs");
 
+    // Reserve a slot for the Swift prespecialized dylib early, so that it can be ordered
+    this->reserveSwiftPrespecializedInputFile();
+
     this->categorizeInputs();
     this->verifySelfContained();
 
@@ -174,11 +393,24 @@
         return Error("Cannot build cache with no dylibs");
 
     this->sortDylibs();
+    this->sortExecutables();
+
+    // Note this needs to be after sorting, so the order of objc dylibs is consistent with all dylibs list
+    this->findObjCDylibs();
+
+    // ObjC dylibs order is now set, so we can create the Swift prespecialized dylib
+    // Note this needs to happen after order is known because the Swift dylib needs to
+    // known indices of other shared cache dylibs. To create the dylib earlier we would need
+    // to add split seg support for dylib indices.
+    if ( Error error = this->createSwiftPrespecializedDylib() ) {
+        swiftPrespecializedDylibBuildError = error.message();
+        return error;
+    }
 
     // Note this needs to be after sorting, as aliases point to the cache dylibs
     this->calculateDylibAliases();
 
-    if ( Error error = this->calculateDylibDependents(); error.hasError() )
+    if ( Error error = this->calculateDylibDependents() )
         return error;
 
     this->categorizeDylibSegments();
@@ -195,7 +427,6 @@
 Error SharedCacheBuilder::estimateGlobalOptimizations()
 {
     this->estimateIMPCaches();
-    this->findObjCDylibs();
     this->findCanonicalObjCSelectors();
     this->findCanonicalObjCClassNames();
     this->findCanonicalObjCProtocolNames();
@@ -210,8 +441,9 @@
     this->estimateSwiftHashTableSizes();
 
     this->calculateCacheDylibsTrie();
-    this->estimatePatchTableSize();
+    this->estimateFunctionVariantsSize();
     this->estimateCacheLoadersSize();
+    this->estimatePrewarmingSize();
 
     this->setupStubOptimizer();
 
@@ -224,6 +456,10 @@
 // Outputs: subCaches
 Error SharedCacheBuilder::createSubCaches()
 {
+    Timer::Scope timedScope(this->config, "createSubCaches time");
+    Timer::AggregateTimer aggregateTimerOwner(this->config);
+    auto& aggregateTimer = aggregateTimerOwner;
+
     this->computeSubCaches();
 
     // Per-subCache optimizations
@@ -232,8 +468,54 @@
     if ( Error error = this->calculateUniqueGOTs(); error.hasError() )
         return error;
 
+    // Finalize the subcaches, by removing any unused regions
+    for ( SubCache& subCache : this->subCaches ) {
+        // Add slide info for each DATA/AUTH segment.  Do this after we've added any other DATA*
+        // segments
+        if ( this->config.slideInfo.slideInfoFormat.has_value() )
+            subCache.addSlideInfoChunks();
+
+        subCache.removeEmptyRegions();
+    }
+
+    // Analyze the dylibs to work out binds and patch table info
+    {
+        this->initializePatchTableOptimizer();
+
+        // Because blocks...
+        std::vector<const CacheDylib*> builderCacheDylibsOwner;
+        auto& builderCacheDylibs = builderCacheDylibsOwner;
+        for ( const CacheDylib& cacheDylib : this->cacheDylibs )
+            builderCacheDylibs.push_back(&cacheDylib);
+
+        Error err = parallel::forEach(this->cacheDylibs, ^(size_t index, CacheDylib& cacheDylib) {
+            Diagnostics diag;
+            PatchInfo& dylibPatchInfo = this->patchTableOptimizer.patchInfos[cacheDylib.cacheIndex];
+
+            std::vector<Error> symbolErrors = cacheDylib.calculateBindTargets(diag, this->config, aggregateTimer, builderCacheDylibs,
+                                                                              dylibPatchInfo);
+            if ( !symbolErrors.empty() ) {
+                for ( const Error& symbolErr : symbolErrors )
+                    this->errors.push_back(symbolErr.message());
+            }
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            cacheDylib.calcuatePatchInfo(diag, this->config, aggregateTimer, dylibPatchInfo);
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            return Error();
+        });
+
+        if ( err )
+            return err;
+    }
+
     this->sortSubCacheSegments();
     this->calculateSlideInfoSize();
+    if ( Error error = this->calculatePatchTableSize(); error.hasError() )
+        return error;
     this->calculateCodeSignatureSize();
     this->printSubCaches();
     if ( Error error = this->computeSubCacheLayout(); error.hasError() )
@@ -278,16 +560,15 @@
     Timer::AggregateTimer aggregateTimerOwner(this->config);
     auto& aggregateTimer = aggregateTimerOwner;
 
-    // Because blocks...
-    std::vector<const CacheDylib*> builderCacheDylibsOwner;
-    auto& builderCacheDylibs = builderCacheDylibsOwner;
-    for ( const CacheDylib& cacheDylib : this->cacheDylibs )
-        builderCacheDylibs.push_back(&cacheDylib);
-
     Error err = parallel::forEach(this->cacheDylibs, ^(size_t index, CacheDylib& cacheDylib) {
         Diagnostics diag;
 
         cacheDylib.copyRawSegments(this->config, aggregateTimer);
+
+        // patch linked dylibs (load commands) as soon as the raw segments were coppied
+        // so next steps have accurate view of the dylib
+        if ( Error patchErr = this->patchLinkedDylibs(cacheDylib) )
+            return patchErr;
 
         PatchInfo& dylibPatchInfo = this->patchTableOptimizer.patchInfos[cacheDylib.cacheIndex];
         cacheDylib.applySplitSegInfo(diag, this->options, this->config,
@@ -299,35 +580,34 @@
         if ( diag.hasError() )
             return Error("%s", diag.errorMessageCStr());
 
-        cacheDylib.calculateBindTargets(diag, this->config, aggregateTimer, builderCacheDylibs,
-                                        dylibPatchInfo);
+        cacheDylib.bind(diag, this->config, aggregateTimer, dylibPatchInfo, this->functionVariantsOptimizer);
         if ( diag.hasError() )
             return Error("%s", diag.errorMessageCStr());
 
-        cacheDylib.bind(diag, this->config, aggregateTimer, dylibPatchInfo);
-        if ( diag.hasError() )
-            return Error("%s", diag.errorMessageCStr());
-
-        cacheDylib.updateObjCSelectorReferences(diag, this->config, aggregateTimer, this->objcSelectorOptimizer);
-        if ( diag.hasError() )
-            return Error("%s", diag.errorMessageCStr());
-
-        cacheDylib.convertObjCMethodListsToOffsets(diag, this->config, aggregateTimer, this->objcSelectorOptimizer.selectorStringsChunk);
-        if ( diag.hasError() )
-            return Error("%s", diag.errorMessageCStr());
-
-        // Note, must be after updating selector references and converting relative methods to selector offsets
-        cacheDylib.sortObjCMethodLists(diag, this->config, aggregateTimer, this->objcSelectorOptimizer.selectorStringsChunk);
-        if ( diag.hasError() )
-            return Error("%s", diag.errorMessageCStr());
-
-        cacheDylib.optimizeLoadsFromConstants(this->config, aggregateTimer,
-                                              this->objcSelectorOptimizer.selectorStringsChunk);
-
-        Error error = cacheDylib.emitObjCIMPCaches(this->config, aggregateTimer, this->objcIMPCachesOptimizer,
-                                                   this->objcSelectorOptimizer.selectorStringsChunk);
-        if ( error.hasError() )
-            return error;
+        // Disable objc optimizations from EK shared cache
+        bool emitObjcOpts = !this->options.platform.isExclaveKit();
+        if ( emitObjcOpts ) {
+            cacheDylib.updateObjCSelectorReferences(diag, this->config, aggregateTimer, this->objcSelectorOptimizer);
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            cacheDylib.convertObjCMethodListsToOffsets(diag, this->config, aggregateTimer, this->objcSelectorOptimizer.selectorStringsChunk);
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            // Note, must be after updating selector references and converting relative methods to selector offsets
+            cacheDylib.sortObjCMethodLists(diag, this->config, aggregateTimer, this->objcSelectorOptimizer.selectorStringsChunk);
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            cacheDylib.optimizeLoadsFromConstants(this->config, aggregateTimer,
+                                                  this->objcSelectorOptimizer.selectorStringsChunk);
+
+            Error error = cacheDylib.emitObjCIMPCaches(this->config, aggregateTimer, this->objcIMPCachesOptimizer,
+                                                       this->objcSelectorOptimizer.selectorStringsChunk);
+            if ( error.hasError() )
+                return error;
+        }
 
         cacheDylib.optimizeStubs(this->options, this->config, aggregateTimer, this->stubOptimizer,
                                  dylibPatchInfo);
@@ -347,47 +627,55 @@
 // Outputs: emitted optimizations in the subCache buffers
 Error SharedCacheBuilder::postDylibEmitChunks()
 {
+    // Disable objc optimizations from EK shared cache
+    bool emitObjcOpts = !this->options.platform.isExclaveKit();
+
+    if ( Error error = this->emitPrewarmingData(); error.hasError() )
+        return error;
 
     this->optimizeTLVs();
 
     if ( Error error = this->emitUniquedGOTs(); error.hasError() )
         return error;
 
-    // Note this has to be before we emit the protocol hash table
-    if ( Error error = this->emitCanonicalObjCProtocols(); error.hasError() )
+    if ( emitObjcOpts ) {
+        // Note this has to be before we emit the protocol hash table
+        if ( Error error = this->emitCanonicalObjCProtocols(); error.hasError() )
+            return error;
+    }
+
+    this->emitCacheDylibsTrie();
+    if ( Error error = this->emitPatchTable() )
         return error;
 
-    this->emitCacheDylibsTrie();
-    if ( Error error = this->emitPatchTable(); error.hasError() )
-        return error;
+    this->emitFunctionVariants();
 
     // Note, this must be after we emit the patch table
     if ( Error error = this->emitCacheDylibsPrebuiltLoaders(); error.hasError() )
         return error;
 
-    this->emitObjCHashTables();
-
-    bool preAttachedCategories = true;
-    if ( preAttachedCategories ) {
+    if ( emitObjcOpts ) {
+        this->emitObjCHashTables();
+        
         // Note this has to be after anyone walking the objc metadata format
         if ( Error error = this->emitPreAttachedObjCCategories(); error.hasError() )
             return error;
-    }
-
-    // Note, this must be after emitCacheDylibsPrebuiltLoaders() as it needs the offset to the SectionLocations*
-    // in the PrebuiltLoader*
-    this->emitObjCHeaderInfo();
-    if ( Error error = this->computeObjCClassLayout(); error.hasError() )
+
+        // Note, this must be after emitCacheDylibsPrebuiltLoaders() as it needs the offset to the SectionLocations*
+        // in the PrebuiltLoader*
+        this->emitObjCHeaderInfo();
+        if ( Error error = this->computeObjCClassLayout(); error.hasError() )
         return error;
 
-    // Note this must be after computeObjCClassLayout() as we need it to set the flags for whether
-    // we have missing weak superclasses or not
-    this->emitObjCOptsHeader();
-
-    // Note, this has to be after we've emitted the objc class hash table, and after emitting
-    // the objc header info
-    if ( Error error = this->emitSwiftHashTables(); error.hasError() )
-        return error;
+        // Note this must be after computeObjCClassLayout() as we need it to set the flags for whether
+        // we have missing weak superclasses or not
+        this->emitObjCOptsHeader();
+
+        // Note, this has to be after we've emitted the objc class hash table, and after emitting
+        // the objc header info
+        if ( Error error = this->emitSwiftHashTables(); error.hasError() )
+            return error;
+    }
 
     // Note, this has to be after we've emitted the objc hash tables and the objc header infos
     if ( Error error = this->emitExecutablePrebuiltLoaders(); error.hasError() )
@@ -409,12 +697,12 @@
 // Outputs: final emitted data in the sub caches
 Error SharedCacheBuilder::finalize()
 {
-
     // Do objc very late, as it adds segments to the mach-o, which aren't in sync with
     // the segments on the CacheDylib
     this->addObjcSegments();
     this->computeCacheHeaders();
     this->codeSign();
+    this->buildAtlas();
 
     return Error();
 }
@@ -491,61 +779,101 @@
     Stats        stats(this->config);
     Timer::Scope timedScope(this->config, "categorizeInputs time");
 
-    for ( InputFile& inputFile : this->allInputFiles ) {
-        if ( inputFile.mf->isDylib() || inputFile.mf->isDyld() ) {
-            auto failureHandler = ^(const char* format, ...) __attribute__((format(printf, 1, 2))) {
-                char*   output_string;
-                va_list list;
-                va_start(list, format);
-                vasprintf(&output_string, format, list);
-                va_end(list);
-                inputFile.setError(Error("%s", (const char*)output_string));
-                free(output_string);
-            };
-
-            std::string_view installName = inputFile.mf->installName();
-            std::string_view dylibPath = inputFile.path;
-            if ( (installName != dylibPath) && ((this->options.platform == dyld3::Platform::macOS) || startsWith(dylibPath, "/System/Cryptexes/OS/")) ) {
-                // We now typically require that install names and paths match.  However symlinks may allow us to bring in a path which
-                // doesn't match its install name.
-                // For example:
-                //   /usr/lib/libstdc++.6.0.9.dylib is a real file with install name /usr/lib/libstdc++.6.dylib
-                //   /usr/lib/libstdc++.6.dylib is a symlink to /usr/lib/libstdc++.6.0.9.dylib
-                // So long as we add both paths (with one as an alias) then this will work, even if dylibs are removed from disk
-                // but the symlink remains.
-                // Apply the same symlink crawling for dylibs that will install their contents to Cryptex paths but will have
-                // install names with the cryptex paths removed.
-                char resolvedSymlinkPath[PATH_MAX];
-                if ( fileSystem.getRealPath(installName.data(), resolvedSymlinkPath) ) {
-                    if ( resolvedSymlinkPath == dylibPath ) {
-                        // Symlink is the install name and points to the on-disk dylib
-                        //fprintf(stderr, "Symlink works: %s == %s\n", inputFile.path, installName.c_str());
-                        dylibPath = installName;
+    struct Chunk
+    {
+        std::vector<CacheDylib>                         cacheDylibs;
+        std::vector<InputFile*>                         exeInputFiles;
+        std::vector<InputFile*>                         nonCacheDylibInputFiles;
+    };
+
+    mapReduce(std::span(allInputFiles), 32, ^(size_t i, Chunk& chunk, std::span<InputFile> files){
+        for ( InputFile& inputFile : files ) {
+            if ( inputFile.mf == nullptr ) continue;
+
+            // only process valid images
+            if ( ((Header*)inputFile.mf)->isDyldManaged() ) {
+                // don't try to validate kernel or firmware mach-o binaries
+                Image image(inputFile.mf, inputFile.size, Image::MappingKind::wholeSliceMapped);
+                if ( mach_o::Error err = image.validate() ) {
+                    inputFile.addError(Error("%s", err.message()));
+                    continue;
+                }
+            }
+
+            if ( inputFile.mf->isDylib() || inputFile.mf->isDyld() ) {
+                auto failureHandler = ^(const char* format, ...) __attribute__((format(printf, 1, 2))) {
+                    char*   output_string;
+                    va_list list;
+                    va_start(list, format);
+                    vasprintf(&output_string, format, list);
+                    va_end(list);
+                    inputFile.addError(Error("%s", (const char*)output_string));
+                    free(output_string);
+                };
+
+                std::string_view installName = ((const Header*)inputFile.mf)->installName();
+                std::string_view dylibPath = inputFile.path;
+                if ( (installName != dylibPath) && ((this->options.platform == Platform::macOS) || startsWith(dylibPath, "/System/Cryptexes/OS/")) ) {
+                    // We now typically require that install names and paths match.  However symlinks may allow us to bring in a path which
+                    // doesn't match its install name.
+                    // For example:
+                    //   /usr/lib/libstdc++.6.0.9.dylib is a real file with install name /usr/lib/libstdc++.6.dylib
+                    //   /usr/lib/libstdc++.6.dylib is a symlink to /usr/lib/libstdc++.6.0.9.dylib
+                    // So long as we add both paths (with one as an alias) then this will work, even if dylibs are removed from disk
+                    // but the symlink remains.
+                    // Apply the same symlink crawling for dylibs that will install their contents to Cryptex paths but will have
+                    // install names with the cryptex paths removed.
+                    char resolvedSymlinkPath[PATH_MAX];
+                    if ( fileSystem.getRealPath(installName.data(), resolvedSymlinkPath) ) {
+                        if ( resolvedSymlinkPath == dylibPath ) {
+                            // Symlink is the install name and points to the on-disk dylib
+                            //fprintf(stderr, "Symlink works: %s == %s\n", inputFile.path, installName.c_str());
+                            dylibPath = installName;
+                        }
                     }
                 }
-            }
-
-            if ( !inputFile.forceNotCacheEligible && inputFile.mf->canBePlacedInDyldCache(dylibPath.data(), failureHandler) ) {
-                CacheDylib cacheDylib(inputFile);
-                this->cacheDylibs.push_back(std::move(cacheDylib));
-            }
-            else {
-                this->nonCacheDylibInputFiles.push_back(&inputFile);
-            }
-            continue;
-        }
-
-        if ( inputFile.mf->isDynamicExecutable() ) {
-            auto failureHandler = ^(const char* reason) {
-                inputFile.setError(Error("%s", reason));
-            };
-            if ( inputFile.mf->canHavePrebuiltExecutableLoader(options.platform, inputFile.path, failureHandler) ) {
-                this->exeInputFiles.push_back(&inputFile);
-            }
-
-            continue;
-        }
-    }
+
+                if ( !inputFile.forceNotCacheEligible && inputFile.mf->canBePlacedInDyldCache(dylibPath.data(), true /* check objc */, failureHandler) ) {
+                    CacheDylib cacheDylib(inputFile);
+                    chunk.cacheDylibs.push_back(std::move(cacheDylib));
+                }
+                else {
+                    chunk.nonCacheDylibInputFiles.push_back(&inputFile);
+                }
+                continue;
+            }
+
+            if ( inputFile.mf->isDynamicExecutable() ) {
+                auto failureHandler = ^(const char* reason) {
+                    inputFile.addError(Error("%s", reason));
+                };
+                if ( inputFile.mf->canHavePrebuiltExecutableLoader(options.platform, inputFile.path, failureHandler) ) {
+                    chunk.exeInputFiles.push_back(&inputFile);
+                }
+
+                continue;
+            }
+        }
+    }, ^(std::span<Chunk> chunks) {
+        size_t cacheDylibsSize = 0;
+        size_t nonCacheSize = 0;
+        size_t exeSize = 0;
+        for ( Chunk& chunk : chunks ) {
+            cacheDylibsSize += chunk.cacheDylibs.size();
+            nonCacheSize += chunk.nonCacheDylibInputFiles.size();
+            exeSize += chunk.exeInputFiles.size();
+        }
+
+        cacheDylibs.reserve(cacheDylibsSize);
+        nonCacheDylibInputFiles.reserve(cacheDylibsSize);
+        exeInputFiles.reserve(exeSize);
+
+        for ( Chunk& chunk : chunks ) {
+            std::move(chunk.cacheDylibs.begin(), chunk.cacheDylibs.end(), std::back_inserter(cacheDylibs));
+            std::move(chunk.nonCacheDylibInputFiles.begin(), chunk.nonCacheDylibInputFiles.end(), std::back_inserter(nonCacheDylibInputFiles));
+            std::move(chunk.exeInputFiles.begin(), chunk.exeInputFiles.end(), std::back_inserter(exeInputFiles));
+        }
+    });
 
     if ( this->config.log.printStats ) {
         stats.add("  inputs: found %lld cache eligible dylibs\n", (uint64_t)this->cacheDylibs.size());
@@ -558,11 +886,23 @@
 {
     Timer::Scope timedScope(this->config, "verifySelfContained time");
 
-    __block std::unordered_set<std::string_view> allDylibs;
+    __block std::unordered_set<std::string_view> badDylibs;
+
+    __block std::unordered_map<std::string_view, const InputFile*> allDylibs;
     allDylibs.reserve(this->allInputFiles.size());
     for ( const InputFile& inputFile : this->allInputFiles ) {
+        if ( inputFile.mf == nullptr ) {
+            // Add the file if its path looks like it might be cache eligible. This is
+            // better than getting no errors at all later
+            if ( inputFile.hasError() && Header::isSharedCacheEligiblePath(inputFile.path.data()) ) {
+                allDylibs.insert({ inputFile.path, &inputFile });
+                badDylibs.insert(inputFile.path);
+            }
+            continue;
+        }
+
         if ( inputFile.mf->isDylib() )
-            allDylibs.insert(inputFile.mf->installName());
+            allDylibs.insert({ ((const Header*)inputFile.mf)->installName(), &inputFile });
     }
 
     __block std::unordered_set<std::string_view> potentialCacheDylibs;
@@ -570,8 +910,10 @@
     for ( const CacheDylib& cacheDylib : this->cacheDylibs )
         potentialCacheDylibs.insert(cacheDylib.installName);
 
-    __block std::unordered_set<std::string_view> badDylibs;
-
+
+    // Keep track of all errors in case we need them to diagnose libsystem being missing
+    __block std::unordered_map<std::string_view, std::list<std::string>> missingDependencies;
+    __block std::unordered_map<std::string_view, std::list<std::string>> ineligibleDependencies;
 
     // check all dependencies to assure every dylib in cache only depends on other dylibs in cache
     __block bool doAgain = true;
@@ -579,6 +921,8 @@
         doAgain = false;
         // scan dylib list making sure all dependents are in dylib list
         for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
+            if ( cacheDylib.inputFile == nullptr ) continue;
+
             //Timer::Scope timedScope(this->config, cacheDylib.installName);
             // Skip dylibs we marked bad from a previous iteration
             if ( cacheDylib.inputFile->hasError() )
@@ -596,37 +940,103 @@
                         }
                     }
                 }
-                if ( potentialCacheDylibs.count(loadPath) == 0 ) {
-                    // Break weak edges, but only if we haven't seen the dylib.
-                    if ( isWeak && (allDylibs.count(loadPath) == 0) )
-                        return;
-                    if ( isWeak && allowedMissingWeakDylibs.count(loadPath) )
-                        return;
-                    std::string reason          = std::string("Could not find dependency '") + loadPath + "'";
-                    cacheDylib.inputFile->setError(Error("%s", reason.c_str()));
+
+                // Break weak edges, but only if we haven't seen the dylib.
+                if ( isWeak && (allDylibs.count(loadPath) == 0) )
+                    return;
+                if ( isWeak && allowedMissingWeakDylibs.count(loadPath) )
+                    return;
+
+                if ( badDylibs.count(loadPath) ) {
+                    std::string reason          = std::string("Depends on ineligible/bad dylib '") + loadPath + "'";
+                    cacheDylib.inputFile->addError(Error("%s", reason.c_str()));
                     badDylibs.insert(cacheDylib.installName);
+                    ineligibleDependencies[cacheDylib.installName].push_back(loadPath);
                     doAgain = true;
-                    stop    = true;
                     return;
                 }
 
-                if ( badDylibs.count(loadPath) ) {
-                    // Break weak edges, but only if we haven't seen the dylib.
-                    if ( isWeak && (allDylibs.count(loadPath) == 0) )
-                        return;
-                    std::string reason          = std::string("Depends on ineligible/bad dylib '") + loadPath + "'";
-                    cacheDylib.inputFile->setError(Error("%s", reason.c_str()));
+                if ( potentialCacheDylibs.count(loadPath) == 0 ) {
+                    std::string reason          = std::string("Could not find dependency '") + loadPath + "'";
+                    cacheDylib.inputFile->addError(Error("%s", reason.c_str()));
                     badDylibs.insert(cacheDylib.installName);
+                    missingDependencies[cacheDylib.installName].push_back(loadPath);
                     doAgain = true;
-                    stop    = true;
                     return;
                 }
             });
         }
     }
 
+    // If libsystem is bad, give up, but try make a better error
+    for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
+        if ( cacheDylib.inputFile == nullptr )
+            continue;
+
+        if ( !cacheDylib.inputFile->hasError() )
+            continue;
+
+        bool isLibSystem = cacheDylib.installName.ends_with("/usr/lib/libSystem.dylib") || cacheDylib.installName.ends_with("/usr/lib/libSystem.B.dylib");
+        if ( !isLibSystem )
+            continue;
+
+        // Walk the dependency tree to try find the real error which caused us to fail
+        std::unordered_set<std::string> seenFiles;
+        std::list<std::string> worklist;
+        worklist.push_back(std::string(cacheDylib.installName));
+        while ( !worklist.empty() ) {
+            std::string dylibInstallName = worklist.back();
+            worklist.pop_back();
+
+            // Check if the dylib depends on something missing
+            // If so, we've found an issue
+            if ( auto it = missingDependencies.find(dylibInstallName); it != missingDependencies.end() ) {
+                for ( auto depInstallName : it->second ) {
+                    std::string reason = "Dylib located at '";
+                    reason += dylibInstallName;
+                    reason += "' not placed in shared cache because: ";
+                    reason += "Could not find dependency '";
+                    reason += depInstallName;
+                    reason += "'";
+                    this->errors.push_back(reason);
+                }
+            }
+
+            // Add bad dependencies to the worklist, hoping to eventually walk to the root cause
+            if ( auto dylibIt = ineligibleDependencies.find(dylibInstallName); dylibIt != ineligibleDependencies.end() ) {
+                for ( auto depInstallName : dylibIt->second ) {
+                    if ( seenFiles.count(depInstallName) )
+                        continue;
+
+                    seenFiles.insert(depInstallName);
+                    worklist.push_back(depInstallName);
+
+                    if ( auto depIt = allDylibs.find(depInstallName); depIt != allDylibs.end() ) {
+                        const InputFile* inputFile = depIt->second;
+                        // Walk the errors on this depenency, skip deps which are just more bad dylibs, and see if we can
+                        // find the root cause
+                        for ( const error::Error& err : inputFile->getErrors() ) {
+                            if ( err.messageContains("Depends on ineligible/bad dylib") )
+                                continue;
+
+                            std::string reason = "Dylib located at '";
+                            reason += depInstallName;
+                            reason += "' not placed in shared cache because: ";
+                            reason += err.message();
+                            this->errors.push_back(reason);
+                        }
+                    }
+                }
+            }
+        }
+
+        cacheDylibs.clear();
+    }
+
     // Add bad dylibs to the "other" dylibs for use in prebuilt loaders
     for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
+        if ( cacheDylib.inputFile == nullptr ) continue;
+
         if ( cacheDylib.inputFile->hasError() ) {
             this->nonCacheDylibInputFiles.push_back(cacheDylib.inputFile);
             this->dylibHasMissingDependency = true;
@@ -635,9 +1045,18 @@
 
     this->cacheDylibs.erase(std::remove_if(this->cacheDylibs.begin(), this->cacheDylibs.end(), [&](const CacheDylib& dylib) {
                                 // Dylibs with errors must be removed from the cache
-                                return dylib.inputFile->hasError();
+                                return dylib.inputFile != nullptr && dylib.inputFile->hasError();
                             }),
                             this->cacheDylibs.end());
+
+    // verify that there's at least one dylib that has an input file
+    if ( !std::any_of(cacheDylibs.begin(), cacheDylibs.end(), [](const CacheDylib& dylib) {
+                    return dylib.inputFile != nullptr;
+                    }) ) {
+        // the only remaining dylib is the synthesized Swift prespecialized dylib
+        // so remove it too
+        cacheDylibs.clear();
+    }
 }
 
 void SharedCacheBuilder::calculateDylibAliases()
@@ -659,39 +1078,38 @@
 {
     Timer::Scope timedScope(this->config, "sortDylibs time");
 
-    std::sort(this->cacheDylibs.begin(), this->cacheDylibs.end(), [&](const CacheDylib& a, const CacheDylib& b) {
+    auto sortOrder = [this](const CacheDylib& dylib) -> uint64_t {
         // HACK: See addObjCOptimizationsToSubCache() and addObjCTextChunk()
         // We put the libobjc __TEXT first in the sub cache so that offsets from it to OBJC_RO are
         // positive.  But dyld4 and objc HeaderInfo data structures rely on the cache dylibs being
         // sorted by mach_header, and moving objc first breaks the order we determine here.  So hack
         // this too and put libobjc first for now.
-        bool isObjCA = (a.installName == "/usr/lib/libobjc.A.dylib");
-        bool isObjCB = (b.installName == "/usr/lib/libobjc.A.dylib");
-        if ( isObjCA != isObjCB )
-            return isObjCA;
-
-        const auto& orderA = options.dylibOrdering.find(std::string(a.installName));
-        const auto& orderB = options.dylibOrdering.find(std::string(b.installName));
-        bool        foundA = (orderA != options.dylibOrdering.end());
-        bool        foundB = (orderB != options.dylibOrdering.end());
-
-        // Order all __DATA_DIRTY segments specified in the order file first, in
-        // the order specified in the file, followed by any other __DATA_DIRTY
-        // segments in lexicographic order.
-        if ( foundA && foundB )
-            return orderA->second < orderB->second;
-        else if ( foundA )
-            return true;
-        else if ( foundB )
-            return false;
+        if ( endsWith(dylib.installName, "/usr/lib/libobjc.A.dylib") )
+            return (1ULL << 32);
+        // rdar://147457741 (dyld shared cache builder should guarnatee that dyld and libdyld end up in the same split cache)
+        if ( endsWith(dylib.installName, "/usr/lib/system/libdyld.dylib") )
+            return (2ULL << 32);
+        if ( endsWith(dylib.installName, "/usr/lib/dyld") )
+            return (3ULL << 32);
+
+        // fixed order first
+        if ( const auto& fixedOrder = options.dylibOrdering.find(std::string(dylib.installName)); fixedOrder != options.dylibOrdering.end() )
+            return (4ULL << 32) + fixedOrder->second;
 
         // Sort mac before iOSMac
-        bool isIOSMacA = startsWith(a.installName, "/System/iOSSupport/");
-        bool isIOSMacB = startsWith(b.installName, "/System/iOSSupport/");
-        if ( isIOSMacA != isIOSMacB )
-            return !isIOSMacA;
-
-        // Finally sort by install name
+        if ( startsWith(dylib.installName, "/System/iOSSupport/") )
+            return (6ULL << 32);
+        return (5ULL << 32);
+    };
+
+    std::sort(this->cacheDylibs.begin(), this->cacheDylibs.end(), [&](const CacheDylib& a, const CacheDylib& b) {
+        uint64_t aOrder = sortOrder(a);
+        uint64_t bOrder = sortOrder(b);
+
+        if ( aOrder != bOrder )
+            return aOrder < bOrder;
+
+        // Finally sort by install name if there's no explicit ordering
         return a.installName < b.installName;
     });
 
@@ -699,6 +1117,46 @@
     uint32_t cacheIndex = 0;
     for ( CacheDylib& cacheDylib : cacheDylibs )
         cacheDylib.cacheIndex = cacheIndex++;
+}
+
+void SharedCacheBuilder::sortExecutables()
+{
+    Timer::Scope timedScope(this->config, "sortExecutables time");
+
+    // make a list of the position on all binaries for anything we don't explicitly sort
+    std::unordered_map<const InputFile*, uint32_t> originalPositions;
+    for ( const InputFile* file : this->exeInputFiles )
+        originalPositions[file] = (uint32_t)originalPositions.size();
+
+    // TODO: Use a file from the perf teams if we have one
+    auto sortOrder = [&originalPositions](const InputFile* file) -> uint64_t {
+        uint32_t originalPos = originalPositions.at(file);
+
+        // Sort some paths first
+        if ( file->path.starts_with("/usr/libexec/") )
+            return (1ULL << 32) + originalPos;
+        if ( file->path.starts_with("/Applications/") )
+            return (2ULL << 32) + originalPos;
+        if ( file->path.starts_with("/private/var/staged_system_apps/") )
+            return (3ULL << 32) + originalPos;
+        if ( file->path.starts_with("/usr/bin/") )
+            return (4ULL << 32) + originalPos;
+
+        // Sort some other paths last
+        if ( file->path.starts_with("/AppleInternal/") )
+            return (62ULL << 32) + originalPos;
+        if ( file->path.starts_with("/usr/local/bin/") )
+            return (63ULL << 32) + originalPos;
+
+        // Any path not in the lists above gets sorted by original position
+        return (32ULL << 32) + originalPos;
+    };
+
+    // Just in case we overflow the buffer in emitExecutablePrebuiltLoaders(), sort the exe files to prioritize
+    // daemons and other common apps
+    std::sort(this->exeInputFiles.begin(), this->exeInputFiles.end(), [&](const InputFile* a, const InputFile* b) {
+        return sortOrder(a) < sortOrder(b);
+    });
 }
 
 Error SharedCacheBuilder::calculateDylibDependents()
@@ -752,6 +1210,30 @@
                 stop = true;
             }
         });
+
+        // copy the original list of dependents
+        cacheDylib.inputDependents = cacheDylib.dependents;
+
+        // note: below changes to dependents need to be kept in sync with load command patching in `patchLinkedDylibs`
+        // we might want to generalize that if more libraries require patching
+
+        // force swiftCore link the prespecialized dylib
+        if ( swiftPrespecializedDylib && cacheDylib.installName.find("libswiftCore.dylib") != std::string_view::npos ) {
+            CacheDylib::DependentDylib depDylib;
+            depDylib.kind  = CacheDylib::DependentDylib::Kind::normal;
+            depDylib.dylib = swiftPrespecializedDylib;
+            cacheDylib.dependents.push_back(std::move(depDylib));
+        }
+
+        // clear all dependents of the prespecialized dylib except libSystem
+        // otherwise loading the library would pull in lots of other dependencies
+        if ( swiftPrespecializedDylib && &cacheDylib == swiftPrespecializedDylib ) {
+            if ( cacheDylib.dependents.empty() || cacheDylib.dependents.front().dylib->installName.find("libSystem") == std::string_view::npos ) {
+                diag.error("expected libSystem as the first linked dylib of %s", cacheDylib.inputHdr->installName());
+            } else {
+                cacheDylib.dependents.erase(cacheDylib.dependents.begin()+1, cacheDylib.dependents.end());
+            }
+        }
 
         if ( diag.hasError() )
             return Error("%s", diag.errorMessageCStr());
@@ -884,36 +1366,39 @@
 }
 
 static void forEachObjCMethodName(const CacheDylib& cacheDylib,
-                                  void (^callback)(std::string_view str))
-{
-    const MachOFile* mf = cacheDylib.inputMF;
-    mf->forEachSection(^(const MachOFile::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
-        if ( strcmp(sectInfo.segInfo.segName, "__TEXT") != 0 )
+                                  void (^callback)(const char* str))
+{
+    __block objc_visitor::Visitor objcVisitor = makeInputDylibObjCVisitor(cacheDylib);
+
+    auto visitPointerBasedMethod = ^(const objc_visitor::Method& method) {
+        const char* selectorString = method.getName(objcVisitor);
+        callback(selectorString);
+    };
+
+    auto visitMethodList = ^(const objc_visitor::MethodList& methodList) {
+        if ( methodList.numMethods() == 0 )
             return;
-        if ( strcmp(sectInfo.sectName, "__objc_methname") != 0 )
+        if ( methodList.usesRelativeOffsets() )
             return;
-        if ( sectInfo.segInfo.isProtected || ((sectInfo.sectFlags & SECTION_TYPE) != S_CSTRING_LITERALS) ) {
-            stop = true;
-            return;
-        }
-        if ( malformedSectionRange ) {
-            stop = true;
-            return;
-        }
-
-        // Use the file offset in the section to get the correct content
-        const char* content     = (const char*)mf + sectInfo.sectFileOffset;
-        uint64_t    sectionSize = sectInfo.sectSize;
-
-        const char* s   = (const char*)content;
-        const char* end = s + sectionSize;
-        while ( s < end ) {
-            std::string_view str = s;
-            callback(str);
-            s += str.size() + 1;
-        }
-
-        stop = true;
+
+        // Check pointer based method lists
+        uint32_t numMethods = methodList.numMethods();
+        for ( uint32_t i = 0; i != numMethods; ++i ) {
+            const objc_visitor::Method& method = methodList.getMethod(objcVisitor, i);
+            visitPointerBasedMethod(method);
+        }
+    };
+
+    // selector references
+    objcVisitor.forEachSelectorReference(^(VMAddress selRefVMAddr, VMAddress selRefTargetVMAddr,
+                                           const char* selectorString) {
+        callback(selectorString);
+    });
+
+    // classes/categories/protocols/swift generics
+    objcVisitor.forEachMethodList(^(objc_visitor::MethodList& objcMethodList,
+                                    std::optional<metadata_visitor::ResolvedValue> extendedMethodTypes) {
+        visitMethodList(objcMethodList);
     });
 }
 
@@ -923,7 +1408,7 @@
     VMOffset          offsetInDylib;
 };
 
-static FoundSymbol findTargetClass(Diagnostics diag,
+static FoundSymbol findTargetClass(Diagnostics& diag,
                                    const std::vector<CacheDylib>& cacheDylibs,
                                    std::string_view symbolName, std::optional<uint32_t> cacheIndex)
 {
@@ -961,12 +1446,13 @@
     if ( !this->config.layout.is64 )
         return;
 
-    if ( this->config.layout.cacheSize.rawValue() > 0x100000000 )
+    // Limited by ImpCacheEntry_v2::impOffset which is 38-bits.  For now limit to 16GB
+    // as that is the maximum we know slide info v5 can get to
+    if ( this->config.layout.cacheSize.rawValue() > 16_GB )
         return;
 
     // Only arm64* are is supported by the runtime
-    std::string_view archName = this->options.archs.name();
-    if ( archName != "arm64e" && archName != "arm64")
+    if ( !this->options.arch.sameCpu(mach_o::Architecture::arm64) )
         return;
 
     // Skip everything if the JSON file is empty
@@ -1054,6 +1540,9 @@
         });
 
         objcVisitor.forEachCategory(^(const objc_visitor::Category& objcCategory, bool& stopCategory) {
+            if ( objcCategory.isForSwiftStubClass() )
+                return;
+
             imp_caches::Category impCacheCategory(objcCategory.getName(objcVisitor));
 
             // instance methods
@@ -1121,6 +1610,7 @@
             bool                    isWeakImport     = false;
         };
 
+        auto& dylibMethodMap = objcIMPCachesOptimizer.methodMap[cacheDylib.installName];
         __block std::vector<BindTarget> bindTargets;
         __block Diagnostics diag;
         cacheDylib.inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
@@ -1211,7 +1701,6 @@
                     objc_visitor::Method objcMethod = objcMethodList.getMethod(objcVisitor, i);
 
                     imp_caches::BucketMethod methodKey = {
-                        .installName = cacheDylib.installName,
                         .className = impCacheClass.name,
                         .methodName = objcMethod.getName(objcVisitor),
                         .isInstanceMethod = !impCacheClass.isMetaClass
@@ -1222,7 +1711,7 @@
                         &cacheDylib,
                         InputDylibVMAddress(impVMAddr.rawValue())
                     };
-                    objcIMPCachesOptimizer.methodMap[methodKey] = inputDylibLocation;
+                    dylibMethodMap[methodKey] = inputDylibLocation;
                 }
             }
 
@@ -1232,6 +1721,9 @@
         // Walk each category and set the class pointer
         __block uint32_t categoryIndex = 0;
         objcVisitor.forEachCategory(^(const objc_visitor::Category& objcCategory, bool& stopCategory) {
+            if ( objcCategory.isForSwiftStubClass() )
+                return;
+
             imp_caches::Category& impCacheCategory = dylib.categories[categoryIndex];
             const DylibClasses& classMap = dylibClassMaps[cacheDylib.cacheIndex];
 
@@ -1284,7 +1776,6 @@
                         objc_visitor::Method objcMethod = objcMethodList.getMethod(objcVisitor, i);
 
                         imp_caches::BucketMethod methodKey = {
-                            .installName = cacheDylib.installName,
                             .className = impCacheCategory.cls->name,
                             .methodName = objcMethod.getName(objcVisitor),
                             .isInstanceMethod = true
@@ -1294,7 +1785,7 @@
                             &cacheDylib,
                             InputDylibVMAddress(impVMAddr.rawValue())
                         };
-                        objcIMPCachesOptimizer.methodMap[methodKey] = inputDylibLocation;
+                        dylibMethodMap[methodKey] = inputDylibLocation;
                     }
                 }
 
@@ -1306,7 +1797,6 @@
                         objc_visitor::Method objcMethod = objcMethodList.getMethod(objcVisitor, i);
 
                         imp_caches::BucketMethod methodKey = {
-                            .installName = cacheDylib.installName,
                             .className = impCacheCategory.cls->name,
                             .methodName = objcMethod.getName(objcVisitor),
                             .isInstanceMethod = false
@@ -1316,7 +1806,7 @@
                             &cacheDylib,
                             InputDylibVMAddress(impVMAddr.rawValue())
                         };
-                        objcIMPCachesOptimizer.methodMap[methodKey] = inputDylibLocation;
+                        dylibMethodMap[methodKey] = inputDylibLocation;
                     }
                 }
             }
@@ -1331,6 +1821,10 @@
     // We should probably keep the piece here to walk the classes as that can perhaps give us a good estimate of the size of the IMP caches
     // themselves, minus the strings which need their own buffer
     this->objcIMPCachesOptimizer.builder->buildImpCaches();
+
+    for ( const std::string& warning : this->objcIMPCachesOptimizer.builder->diags.warnings() ) {
+        this->warning("IMP caches: %s", warning.data());
+    }
 
     // Push all the IMP cache selectors in to the main selectors buffer.
     // We could try have an IMP cache selectors buffer and a regular selectors buffer, but that complicates
@@ -1361,6 +1855,7 @@
 
     // Add space for the IMP caches themselves
     this->objcIMPCachesOptimizer.dylibIMPCaches.resize(dylibs.size());
+    size_t discardedCaches = 0;
     for ( uint32_t dylibIndex = 0; dylibIndex != dylibs.size(); ++dylibIndex ) {
         imp_caches::Dylib& dylib = dylibs[dylibIndex];
         ObjCIMPCachesOptimizer::IMPCacheMap& dylibIMPCaches = objcIMPCachesOptimizer.dylibIMPCaches[dylibIndex];
@@ -1369,6 +1864,24 @@
             if ( !impCache.has_value() )
                 continue;
 
+            // rdar://147219191 (Selector bucket overflows in IMP caches result in malformed tables)
+            // validate selector offsets point to matching buckets
+            bool discard = false;
+            for ( uint32_t slot = 0; slot < impCache->buckets.size(); ++slot ) {
+                const imp_caches::Bucket& bucket = impCache->buckets[slot];
+                if ( !bucket.isEmptyBucket ) {
+                    uint32_t expectedSlot = (bucket.selOffset >> impCache->cache_shift) & impCache->cache_mask;
+                    if ( expectedSlot != slot ) {
+                        discard = true;
+                        break;
+                    }
+                }
+            }
+            if ( discard ) {
+                ++discardedCaches;
+                continue;
+            }
+
             VMOffset currentOffset((uint64_t)this->objcIMPCachesOptimizer.impCachesTotalByteSize);
             assert((this->objcIMPCachesOptimizer.impCachesTotalByteSize % 8) == 0);
             this->objcIMPCachesOptimizer.impCachesTotalByteSize += sizeof(ImpCacheHeader_v2);
@@ -1380,6 +1893,10 @@
         }
     }
 
+    if ( discardedCaches != 0 ) {
+        this->warning("[ERROR] discarded IMP caches: %lu", discardedCaches);
+    }
+
     if ( this->config.log.printStats ) {
         stats.add("  objc: found %lld imp cache selectors\n", (uint64_t)this->objcSelectorOptimizer.selectorsMap.size());
         stats.add("  objc: using %lld bytes\n", this->objcSelectorOptimizer.selectorStringsTotalByteSize);
@@ -1390,7 +1907,7 @@
 void SharedCacheBuilder::findObjCDylibs()
 {
     // driverKit has no objc
-    if ( this->options.platform == dyld3::Platform::driverKit )
+    if ( this->options.platform == Platform::driverKit )
         return;
 
     Stats        stats(this->config);
@@ -1398,7 +1915,9 @@
 
     assert(this->objcOptimizer.objcDylibs.empty());
     for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
-        if ( cacheDylib.inputMF->hasObjC() )
+        if ( cacheDylib.inputHdr == nullptr ) continue;
+
+        if ( cacheDylib.inputHdr->hasObjC() )
             this->objcOptimizer.objcDylibs.push_back(&cacheDylib);
     }
 
@@ -1439,11 +1958,19 @@
     Error err = parallel::forEach(this->cacheDylibs, ^(size_t index, CacheDylib& cacheDylib) {
         std::vector<std::string_view>& strings = dylibSelectors[index];
 
-        forEachObjCMethodName(cacheDylib, ^(std::string_view str) {
-            strings.push_back(std::move(str));
+        __block std::unordered_set<const void*> seenStrings;
+        forEachObjCMethodName(cacheDylib, ^(const char* str) {
+            auto itAndInserted = seenStrings.insert(str);
+            if ( itAndInserted.second )
+                strings.push_back(str);
         });
 
-        // FIXME: Walk selector references, classes, categories, protocols, etc
+        // sort strings as we used to walk the __objc_methname section in order, so by sorting we
+        // should get close to possible to the old order
+        std::sort(strings.begin(), strings.end(), [](const std::string_view& a, const std::string_view& b) {
+            // note pointer comparison as we want the string location in the binary not its contents
+            return a.data() < b.data();
+        });
 
         return Error();
     });
@@ -1815,7 +2342,7 @@
     size_t objcIndex = 0;
     for (size_t cacheIndex = 0; cacheIndex < this->cacheDylibs.size(); cacheIndex++) {
         CacheDylib& cacheDylib = this->cacheDylibs[cacheIndex];
-        if ( !cacheDylib.inputMF->hasObjC() )
+        if ( !cacheDylib.inputHdr->hasObjC() )
             continue;
 
         // Skip dylibs with opcode fixups, as the Category visitor operates on chained fixups to find classes
@@ -1868,6 +2395,10 @@
         });
 
         objCVisitor.forEachCategory(^(const objc_visitor::Category &objcCategory, bool &stopCategory) {
+
+            // Skip catlist2 entries.  These are only for Swift stub classes
+            if ( objcCategory.isForSwiftStubClass() )
+                return;
 
             __block ObjCCategoryOptimizer::Category objCCategoryInfo(objcCategory.getName(objCVisitor));
             objCCategoryInfo.dylibObjcIndex = objcIndex;
@@ -1961,7 +2492,10 @@
     uint32_t elementsWithPadding = maxElements * 11 / 10; // if close to power of 2, perfect hash may fail, so don't get within 10% of that
     uint32_t powTwoCapacity      = 1 << (32 - __builtin_clz(elementsWithPadding - 1));
     uint32_t headerSize          = 4 * (8 + 256);
-    return headerSize + powTwoCapacity / 2 + powTwoCapacity + powTwoCapacity * perElementData;
+    uint32_t totalSize = headerSize + powTwoCapacity / 2 + powTwoCapacity + powTwoCapacity * perElementData;
+
+    // Add in a little extra room for very small tables when we are building unit tests
+    return std::max(totalSize, 4096U);
 }
 
 void SharedCacheBuilder::estimateObjCHashTableSizes()
@@ -2098,6 +2632,11 @@
     return hashTableSize + (3 * sizeof(uint64_t) * maxElements);
 }
 
+static uint32_t ptrHashTableSize(uint32_t maxElement, uint32_t numPointerKeys)
+{
+    return swiftHashTableSize(maxElement) + numPointerKeys * sizeof(uint64_t);
+}
+
 void SharedCacheBuilder::estimateSwiftHashTableSizes()
 {
     if ( this->objcOptimizer.objcDylibs.empty() )
@@ -2106,7 +2645,7 @@
     Stats        stats(this->config);
     Timer::Scope timedScope(this->config, "estimateSwiftHashTableSizes time");
 
-    this->swiftProtocolConformanceOptimizer.optsHeaderByteSize = sizeof(SwiftOptimizationHeader);
+    this->swiftOptimizer.optsHeaderByteSize = sizeof(SwiftOptimizationHeader);
 
     __block uint32_t numTypeConformances = 0;
     __block uint32_t numMetadataConformances = 0;
@@ -2152,7 +2691,29 @@
         });
     }
 
-    auto& optimizer = this->swiftProtocolConformanceOptimizer;
+    auto& optimizer = this->swiftOptimizer;
+
+    if ( swiftPrespecializedDylib ) {
+        Diagnostics diagVal;
+        Diagnostics& diag = diagVal;
+        SwiftVisitor swiftVisitorVal = makeInputDylibSwiftVisitor(*swiftPrespecializedDylib);
+        SwiftVisitor& swiftVisitor = swiftVisitorVal;
+        swiftVisitor.forEachPointerHashTable(diag, ^(metadata_visitor::ResolvedValue sectionBase, size_t tableIndex, uint8_t *tableStart, size_t numEntries) {
+            assert(optimizer.prespecializedMetadataHashTables.size() == tableIndex);
+
+            PointerHashTableOptimizerInfo& tableInfo = optimizer.prespecializedMetadataHashTables.emplace_back();
+            swiftVisitor.forEachPointerHashTableRelativeEntry(diag, tableStart, VMAddress(0ull), ^(size_t index, std::span<uint64_t> keys, uint64_t value) {
+                assert(!keys.empty() && "pointer keys can't be empty");
+
+                ++tableInfo.numEntries;
+                tableInfo.numPointerKeys += (uint32_t)keys.size();
+            });
+
+            tableInfo.size = ptrHashTableSize(tableInfo.numEntries, tableInfo.numPointerKeys);
+            assert(tableInfo.numEntries == numEntries);
+        });
+    }
+
     optimizer.typeConformancesHashTableSize = swiftHashTableSize(numTypeConformances);
     optimizer.metadataConformancesHashTableSize = swiftHashTableSize(numMetadataConformances);
     optimizer.foreignTypeConformancesHashTableSize = swiftHashTableSize(numForeignConformances);
@@ -2162,6 +2723,12 @@
                   (uint64_t)optimizer.typeConformancesHashTableSize, numTypeConformances);
         stats.add("  swift: metadata hash table estimated size: %lld (from %d entries)\n", (uint64_t)optimizer.metadataConformancesHashTableSize, numMetadataConformances);
         stats.add("  swift: foreign metadata hash table estimated size: %lld (from %d entries)\n", (uint64_t)optimizer.foreignTypeConformancesHashTableSize, numForeignConformances);
+
+        stats.add("  swift: prespecialized metadata hash tables %lu\n", optimizer.prespecializedMetadataHashTables.size());
+        for ( int i = 0; i < optimizer.prespecializedMetadataHashTables.size(); ++i ) {
+            const PointerHashTableOptimizerInfo& tableInfo = optimizer.prespecializedMetadataHashTables[i];
+            stats.add("  swift: prespecialized metadata hash table #%d. estimated size: %lld (from %u entries)\n", i, (uint64_t)tableInfo.size, tableInfo.numEntries);
+        }
     }
 }
 
@@ -2197,7 +2764,7 @@
 
     // For each alias, also see if we have intermediate aliases
     // This is the "Current -> A" symlink in say "/S/L/F/CF.fw/Current/CF"
-    if ( this->options.platform == dyld3::Platform::macOS ) {
+    if ( this->options.platform == Platform::macOS ) {
         for ( const cache_builder::FileAlias& alias : this->inputIntermediateAliases ) {
             const auto& pos = dylibPathToDylibIndex.find(alias.realPath);
             if ( pos != dylibPathToDylibIndex.end() ) {
@@ -2217,89 +2784,78 @@
     }
 }
 
-void SharedCacheBuilder::estimatePatchTableSize()
-{
+void SharedCacheBuilder::initializePatchTableOptimizer()
+{
+    // Reserve space for the patch infos, one per dylib
+    // TODO: Just move these to the dylib.  We don't really need them to be owned by the PatchTableOptimizer
+    this->patchTableOptimizer.patchInfos.resize(this->cacheDylibs.size());
+}
+
+void SharedCacheBuilder::estimateFunctionVariantsSize()
+{
+    this->functionVariantsOptimizer.fvInfoTotalByteSize = sizeof(dyld_cache_function_variant_entry) * 256;  // FIXME
+}
+
+void SharedCacheBuilder::estimatePrewarmingSize()
+{
+    // Skip everything if the JSON file is empty
+    if ( this->options.prewarmingOptimizations.empty() )
+        return;
+
+    using json::Node;
+    using json::NodeValueType;
+
     Stats        stats(this->config);
-    Timer::Scope timedScope(this->config, "estimatePatchTableSize time");
-
-    // The patch table consists of a series of arrays.
-    // For each dylib, we have a list of all clients of that dylib
-    // For each dylib we also have a list of used exports
-    // For each client we then have a list of symbols used
-    // And for each list of symbols, we have a list of locations to patch
-    // We need to estimate a patch table based on the above lists
-
-    __block uint32_t bindStringsLength = 0;
-    __block uint32_t numBindTargets = 0;
-    __block uint32_t numBinds = 0;
-    uint32_t numClients = 0;
-    for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
-        __block Diagnostics diag;
-        cacheDylib.inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
-            mach_o::Fixups fixups(layout);
-            fixups.forEachBindTarget(diag, true, 0,
-                                     ^(const mach_o::Fixups::BindTargetInfo& info, bool &stop) {
-                ++numBindTargets;
-                bindStringsLength += strlen(info.symbolName) + 1;
-            }, ^(const mach_o::Fixups::BindTargetInfo &info, bool &stop) {
-                ++numBindTargets;
-                bindStringsLength += strlen(info.symbolName) + 1;
-            });
-
-            if ( cacheDylib.inputMF->hasChainedFixups() ) {
-                fixups.withChainStarts(diag, ^(const dyld_chained_starts_in_image* starts) {
-                    fixups.forEachFixupInAllChains(diag, starts, false,
-                                                   ^(mach_o::ChainedFixupPointerOnDisk* fixupLoc,
-                                                     uint64_t fixupSegmentOffset,
-                                                     const dyld_chained_starts_in_segment* segInfo,
-                                                     bool& stop) {
-                        uint32_t bindOrdinal = ~0U;
-                        int64_t  addend = -1;
-                        if ( fixupLoc->isBind(segInfo->pointer_format, bindOrdinal, addend) )
-                            ++numBinds;
-                    });
-                });
-            } else {
-                fixups.forEachBindLocation_Opcodes(diag,
-                                                   ^(uint64_t runtimeOffset, uint32_t segmentIndex,
-                                                     unsigned int targetIndex, bool &stop) {
-                    ++numBinds;
-                }, ^(uint64_t runtimeOffset, uint32_t segmentIndex, unsigned int overrideBindTargetIndex,
-                     bool &stop) {
-                    ++numBinds;
-                });
-            }
-        });
-
-        numClients += (uint32_t)cacheDylib.dependents.size();
-    }
-
-    // Start with the patch header
-    uint64_t size = sizeof(dyld_cache_patch_info_v3);
-
-    // One of these for each dylib
-    size += sizeof(dyld_cache_image_patches_v2) * this->cacheDylibs.size();
-
-    // Estimate that 2/3 of exports are used
-    size += (sizeof(dyld_cache_image_export_v2) * numBindTargets * 2) / 3;
-    size += (bindStringsLength * 2) / 3;
-
-    // 1 entry per client
-    size += sizeof(dyld_cache_image_clients_v2) * numClients;
-
-    // 1 entry per bind target
-    size += sizeof(dyld_cache_patchable_export_v2) * numBindTargets;
-
-    // 1 entry per location we bind to
-    size += sizeof(dyld_cache_patchable_location_v4) * numBinds;
-
-    this->patchTableOptimizer.patchTableTotalByteSize = size;
-    
-    // Reserve space for the patch infos, one per dylib
-    this->patchTableOptimizer.patchInfos.resize(this->cacheDylibs.size());
+    Timer::Scope timedScope(this->config, "estimatePrewarmingSize time");
+
+    Diagnostics diag;
+    Node rootNode = json::readJSON(diag, this->options.prewarmingOptimizations.data(),
+                                   this->options.prewarmingOptimizations.size(),
+                                   false /* useJSON5 */);
+    if ( diag.hasError() )
+        return;
+
+    // Format is something like:
+    // [
+    //     {
+    //         "install_name": "..."
+    //         "locations": [
+    //             {
+    //                 "name": "symbol name",
+    //                 "offset": 0
+    //             }
+    //         ]
+    //     },
+    // ...
+    // ]
+    if ( rootNode.type != NodeValueType::Array )
+        return;
+
+    uint64_t numElements = 0;
+    for ( const Node& fileNode : rootNode.array ) {
+        auto locationsIt = fileNode.map.find("locations");
+        if ( locationsIt == fileNode.map.end() ) {
+            // FIXME: Should we error out if the JSON isn't what we expect
+            // For now just skip bad data
+            continue;
+        }
+
+        if ( locationsIt->second.type != NodeValueType::Array ) {
+            // FIXME: Should we error out if the JSON isn't what we expect
+            // For now just skip bad data
+            continue;
+        }
+
+        numElements += locationsIt->second.array.size();
+    }
+
+    this->prewarmingOptimizer.prewarmingByteSize = 0;
+    this->prewarmingOptimizer.prewarmingByteSize += sizeof(dyld_prewarming_header);
+    this->prewarmingOptimizer.prewarmingByteSize += numElements * sizeof(dyld_prewarming_entry);
+    this->prewarmingOptimizer.prewarmingByteSize = (uint32_t)alignTo((uint64_t)this->prewarmingOptimizer.prewarmingByteSize, 8);
 
     if ( this->config.log.printStats ) {
-        stats.add("  patch table estimated size: %lld\n", (uint64_t)this->patchTableOptimizer.patchTableTotalByteSize);
+        stats.add("  dyld4 prewarming estimated size: %lld\n", (uint64_t)this->prewarmingOptimizer.prewarmingByteSize);
     }
 }
 
@@ -2318,12 +2874,12 @@
             size += cacheDylib.inputFile->path.size() + 1;
             size = alignTo(size, alignof(dyld4::Loader::LoaderRef));
             size += sizeof(dyld4::Loader::LoaderRef) * cacheDylib.dependents.size();
-            size += sizeof(Loader::DependentDylibAttributes) * cacheDylib.dependents.size();
+            size += sizeof(Loader::LinkedDylibAttributes) * cacheDylib.dependents.size();
             size += sizeof(Loader::FileValidationInfo);
             size += sizeof(Loader::Region) * cacheDylib.segments.size();
 
             // iOSMac dylibs likely contain a patch table
-            if ( (this->options.platform == dyld3::Platform::macOS)
+            if ( (this->options.platform == Platform::macOS)
                 && startsWith(cacheDylib.installName, "/System/iOSSupport") ) {
                 __block Diagnostics diag;
                 cacheDylib.inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
@@ -2345,7 +2901,7 @@
     // patch tables, etc.  For now, 20KB/executable seems about right
     // FIXME: Small programs like daemons need just 500 bytes or so, but large /Applications need up to 1MB
     // Come up with a better estimate
-    this->prebuiltLoaderBuilder.executablesLoaderSize = 20_KB * this->exeInputFiles.size();
+    this->prebuiltLoaderBuilder.executablesLoaderSize = 30_KB * this->exeInputFiles.size();
 
     // Estimate the trie size
     // Assume they are all at a high offset
@@ -2358,7 +2914,7 @@
         // Assumes that cdHash strings at runtime use lowercase a-f digits
         uint32_t codeSignFileOffset = 0;
         uint32_t codeSignFileSize   = 0;
-        if ( inputFile->mf->hasCodeSignature(codeSignFileOffset, codeSignFileSize) ) {
+        if ( ((const Header*)inputFile->mf)->hasCodeSignature(codeSignFileOffset, codeSignFileSize) ) {
             auto handler = ^(const uint8_t cdHash[20]) {
                 std::string cdHashStr = "/cdhash/";
                 cdHashStr.reserve(24);
@@ -2440,6 +2996,10 @@
 // This adds all of them to the given subCache
 void SharedCacheBuilder::addObjCOptimizationsToSubCache(SubCache& subCache)
 {
+    // Disable objc optimizations for EK
+    if ( this->options.platform.isExclaveKit() )
+        return;
+
     // Add objc header info RW
     subCache.addObjCHeaderInfoReadWriteChunk(this->config, this->objcOptimizer);
 
@@ -2450,40 +3010,41 @@
     subCache.addObjCCategoriesChunk(this->config, this->objcCategoryOptimizer);
 
     // Add objc opts header
-    subCache.addObjCOptsHeaderChunk(this->objcOptimizer);
+    subCache.addObjCOptsHeaderChunk(this->config, this->objcOptimizer);
 
     // Add objc header info RO
-    subCache.addObjCHeaderInfoReadOnlyChunk(this->objcOptimizer);
+    subCache.addObjCHeaderInfoReadOnlyChunk(this->config, this->objcOptimizer);
 
     // Add objc image info
-    subCache.addObjCImageInfoChunk(this->objcOptimizer);
+    subCache.addObjCImageInfoChunk(this->config, this->objcOptimizer);
 
     // Add selector strings and hash table. These need to be adjacent as the table has offsets in
     // to the string section
-    subCache.addObjCSelectorStringsChunk(this->objcSelectorOptimizer);
-    subCache.addObjCSelectorHashTableChunk(this->objcSelectorOptimizer);
+    subCache.addObjCSelectorStringsChunk(this->config, this->objcSelectorOptimizer);
+    subCache.addObjCSelectorHashTableChunk(this->config, this->objcSelectorOptimizer);
 
     // Add class name strings and hash table
-    subCache.addObjCClassNameStringsChunk(this->objcClassOptimizer);
-    subCache.addObjCClassHashTableChunk(this->objcClassOptimizer);
+    subCache.addObjCClassNameStringsChunk(this->config, this->objcClassOptimizer);
+    subCache.addObjCClassHashTableChunk(this->config, this->objcClassOptimizer);
 
     // Add protocol name strings and hash table
-    subCache.addObjCProtocolNameStringsChunk(this->objcProtocolOptimizer);
-    subCache.addObjCProtocolHashTableChunk(this->objcProtocolOptimizer);
+    subCache.addObjCProtocolNameStringsChunk(this->config, this->objcProtocolOptimizer);
+    subCache.addObjCProtocolHashTableChunk(this->config, this->objcProtocolOptimizer);
 
     // Add Swift demangled name strings found in ObjC protocol metadata
-    subCache.addObjCProtocolSwiftDemangledNamesChunk(this->objcProtocolOptimizer);
+    subCache.addObjCProtocolSwiftDemangledNamesChunk(this->config, this->objcProtocolOptimizer);
 
     // Add ObjC IMP Caches
-    subCache.addObjCIMPCachesChunk(this->objcIMPCachesOptimizer);
+    subCache.addObjCIMPCachesChunk(this->config, this->objcIMPCachesOptimizer);
 
     // Add Swift opts header
-    subCache.addSwiftOptsHeaderChunk(this->swiftProtocolConformanceOptimizer);
+    subCache.addSwiftOptsHeaderChunk(this->config, this->swiftOptimizer);
 
     // Add Swift hash tables
-    subCache.addSwiftTypeHashTableChunk(this->swiftProtocolConformanceOptimizer);
-    subCache.addSwiftMetadataHashTableChunk(this->swiftProtocolConformanceOptimizer);
-    subCache.addSwiftForeignHashTableChunk(this->swiftProtocolConformanceOptimizer);
+    subCache.addSwiftTypeHashTableChunk(this->config, this->swiftOptimizer);
+    subCache.addSwiftMetadataHashTableChunk(this->config, this->swiftOptimizer);
+    subCache.addSwiftForeignHashTableChunk(this->config, this->swiftOptimizer);
+    subCache.addSwiftPrespecializedMetadataPointerTableChunks(this->config, this->swiftOptimizer);
 }
 
 // The shared cache contains many global optimizations such as dyld4 loaders, trie's, etc.
@@ -2497,6 +3058,9 @@
     // Add patch table
     subCache.addPatchTableChunk(this->patchTableOptimizer);
 
+    // Add function-variants table
+    subCache.addFunctionVariantsChunk(this->functionVariantsOptimizer);
+
     // Add cache dylib Loader's
     subCache.addCacheDylibsLoaderChunk(this->prebuiltLoaderBuilder);
 
@@ -2505,24 +3069,57 @@
 
     // Add executable trie
     subCache.addExecutablesTrieChunk(this->prebuiltLoaderBuilder);
+
+    // Add prewarming data
+    subCache.addPrewarmingDataChunk(this->config, this->prewarmingOptimizer);
 }
 
 // Every subCache needs a code signature, and subCache's with DATA* need slide info.  This adds
 // anything we need, based on whatever else is already in the SubCache.
 void SharedCacheBuilder::addFinalChunksToSubCache(SubCache& subCache)
 {
-    subCache.addCacheHeaderChunk(this->cacheDylibs);
-
-    // Add slide info for each DATA/AUTH segment.  Do this after we've added any other DATA*
-    // segments
-    if ( this->config.slideInfo.slideInfoFormat.has_value() )
-        subCache.addSlideInfoChunks();
+    subCache.addCacheHeaderChunk(this->options, this->config, this->cacheDylibs);
 
     // Add a code signature region
     subCache.addCodeSignatureChunk();
-
-    // Finalize the SubCache, by removing any unused regions
-    subCache.removeEmptyRegions();
+}
+
+static std::optional<Header::SectionInfo> getStubsSection(const DylibSegmentChunk* dylibChunk)
+{
+    if ( dylibChunk == nullptr )
+        return std::nullopt;
+    if ( dylibChunk->kind != Chunk::Kind::dylibText )
+        return std::nullopt;
+
+    const MachOFile* mf = dylibChunk->inputFile->mf;
+    __block std::optional<Header::SectionInfo> stubsSection;
+    ((const Header*)mf)->forEachSection(^(const Header::SectionInfo &sectInfo, bool &stop) {
+        if ( dylibChunk->segmentName != sectInfo.segmentName )
+            return;
+
+        unsigned sectionType = (sectInfo.flags & SECTION_TYPE);
+        if ( sectionType != S_SYMBOL_STUBS )
+            return;
+
+        if ( sectInfo.segmentName != "__TEXT" ) {
+            // stubs aren't in __TEXT.  Give up on this one for now
+            return;
+        }
+
+        if ( mf->isArch("arm64e") ) {
+            // For arm64e, we can only optimize __auth_stubs
+            if ( sectInfo.sectionName != "__auth_stubs" )
+                return;
+        } else {
+            // For non-arm64e, we can only optimize __stubs
+            if ( sectInfo.sectionName != "__stubs" )
+                return;
+        }
+
+        stubsSection = sectInfo;
+    });
+
+    return stubsSection;
 }
 
 // Add stubs Chunk's for every stubs section in the given text subCache
@@ -2536,60 +3133,29 @@
         const DylibSegmentChunk* textChunk = textRegionChunk->isDylibSegmentChunk();
         if ( textChunk == nullptr )
             continue;
-        if ( textChunk->kind != Chunk::Kind::dylibText )
+
+        std::optional<Header::SectionInfo> sectInfo = getStubsSection(textChunk);
+        if ( !sectInfo )
             continue;
 
-        const MachOFile* mf = textChunk->inputFile->mf;
-        mf->forEachSection(^(const dyld3::MachOFile::SectionInfo &sectInfo,
-                             bool malformedSectionRange, bool &stop) {
-            if ( textChunk->segmentName != sectInfo.segInfo.segName )
-                return;
-
-            unsigned sectionType = (sectInfo.sectFlags & SECTION_TYPE);
-            if ( sectionType != S_SYMBOL_STUBS )
-                return;
-
-            if ( strcmp(sectInfo.segInfo.segName, "__TEXT") != 0 ) {
-                // stubs aren't in __TEXT.  Give up on this one for now
-                return;
-            }
-
-            // Make a stubs chunk for this stubs section
-            CacheDylib* cacheDylib = fileToDylibMap.at(textChunk->inputFile);
-            StubsChunk* devStubsChunk = nullptr;
-            StubsChunk* customerStubsChunk = nullptr;
-
-            if ( mf->isArch("arm64e") ) {
-                // For arm64e, we can only optimize __auth_stubs
-                if ( !strcmp(sectInfo.sectName, "__auth_stubs") ) {
-                    devStubsChunk       = &cacheDylib->developmentStubs;
-                    customerStubsChunk  = &cacheDylib->customerStubs;
-                }
-            } else {
-                // For non-arm64e, we can only optimize __stubs
-                if ( !strcmp(sectInfo.sectName, "__stubs") ) {
-                    devStubsChunk       = &cacheDylib->developmentStubs;
-                    customerStubsChunk  = &cacheDylib->customerStubs;
-                }
-            }
-
-            if ( devStubsChunk == nullptr )
-                return;
-
-            assert(devStubsChunk->segmentName.empty());
-            devStubsChunk->segmentName = sectInfo.segInfo.segName;
-            devStubsChunk->sectionName = sectInfo.sectName;
-            devStubsChunk->subCacheFileSize = CacheFileSize(sectInfo.sectSize);
-            devStubsChunk->cacheVMSize = CacheVMSize(sectInfo.sectSize);
-            devStubsSubCache.addStubsChunk(devStubsChunk);
-
-            assert(customerStubsChunk->segmentName.empty());
-            customerStubsChunk->segmentName = sectInfo.segInfo.segName;
-            customerStubsChunk->sectionName = sectInfo.sectName;
-            customerStubsChunk->subCacheFileSize = CacheFileSize(sectInfo.sectSize);
-            customerStubsChunk->cacheVMSize = CacheVMSize(sectInfo.sectSize);
-            customerStubsSubCache.addStubsChunk(customerStubsChunk);
-        });
+        // Make a stubs chunk for this stubs section
+        CacheDylib* cacheDylib = fileToDylibMap.at(textChunk->inputFile);
+        StubsChunk* devStubsChunk = &cacheDylib->developmentStubs;
+        StubsChunk* customerStubsChunk = &cacheDylib->customerStubs;
+
+        assert(devStubsChunk->segmentName.empty());
+        devStubsChunk->segmentName = sectInfo->segmentName;
+        devStubsChunk->sectionName = sectInfo->sectionName;
+        devStubsChunk->subCacheFileSize = CacheFileSize(sectInfo->size);
+        devStubsChunk->cacheVMSize = CacheVMSize(sectInfo->size);
+        devStubsSubCache.addStubsChunk(devStubsChunk);
+
+        assert(customerStubsChunk->segmentName.empty());
+        customerStubsChunk->segmentName = sectInfo->segmentName;
+        customerStubsChunk->sectionName = sectInfo->sectionName;
+        customerStubsChunk->subCacheFileSize = CacheFileSize(sectInfo->size);
+        customerStubsChunk->cacheVMSize = CacheVMSize(sectInfo->size);
+        customerStubsSubCache.addStubsChunk(customerStubsChunk);
     }
 }
 
@@ -2696,9 +3262,49 @@
                         // Nothing to do here
                         break;
                     case cache_builder::Region::Kind::dataConst:
+                    case cache_builder::Region::Kind::tproConst:
                     case cache_builder::Region::Kind::data:
                     case cache_builder::Region::Kind::auth:
-                    case cache_builder::Region::Kind::authConst: {
+                    case cache_builder::Region::Kind::authConst:
+                    case cache_builder::Region::Kind::tproAuthConst:{
+                        Region& newRegion = newSubCache.regions[(uint32_t)oldRegion.kind];
+                        newRegion.chunks = std::move(oldRegion.chunks);
+                        break;
+                    }
+                    case cache_builder::Region::Kind::readOnly:
+                    case cache_builder::Region::Kind::linkedit:
+                    case cache_builder::Region::Kind::unmapped:
+                    case cache_builder::Region::Kind::dynamicConfig:
+                    case cache_builder::Region::Kind::codeSignature:
+                    case cache_builder::Region::Kind::numKinds:
+                        break;
+                }
+            }
+        }
+
+        // Also split the current file so that read-only are in their own files
+        {
+            // Create a new subCache
+            newSubCaches.push_back(SubCache::makeSubCache(options));
+            SubCache& newSubCache = newSubCaches.back();
+
+            // Move all data to the new subCache
+            for ( Region& oldRegion : subCache.regions ) {
+                if ( oldRegion.chunks.empty() )
+                    continue;
+
+                // Move all the data regions, leave the rest
+                switch ( oldRegion.kind ) {
+                    case cache_builder::Region::Kind::text:
+                    case cache_builder::Region::Kind::dataConst:
+                    case cache_builder::Region::Kind::tproConst:
+                    case cache_builder::Region::Kind::data:
+                    case cache_builder::Region::Kind::auth:
+                    case cache_builder::Region::Kind::authConst:
+                    case cache_builder::Region::Kind::tproAuthConst:
+                        // Nothing to do here
+                        break;
+                    case cache_builder::Region::Kind::readOnly:{
                         Region& newRegion = newSubCache.regions[(uint32_t)oldRegion.kind];
                         newRegion.chunks = std::move(oldRegion.chunks);
                         break;
@@ -2725,56 +3331,104 @@
 {
     SubCache* currentSubCache = firstSubCache;
 
-    // We'll add LINKEDIT at the end.  As the shared region is <= 4GB in size, we can fit
-    // all the LINKEDIT in the last subCache and still keep it in range of 32-bit offsets
-    bool allLinkeditInLastSubCache = this->config.layout.allLinkeditInLastSubCache;
+    std::string_view libObjcInstallName = "/usr/lib/libobjc.A.dylib";
+    if ( this->options.platform.isExclaveKit() )
+        libObjcInstallName = "/System/ExclaveKit/usr/lib/libobjc.A.dylib";
+
+    // If we have an objc dylib, then gets its optimizations now to work out their size first
+    // The subCache with libobjc gets the header info sections, which for now we assume is the first subcache
+    // Add all the objc tables.  This must be done before we add libobjc's __TEXT
+    for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
+        if ( cacheDylib.installName == libObjcInstallName ) {
+            this->addObjCOptimizationsToSubCache(*currentSubCache);
+            break;
+        }
+    }
 
     // Walk all the dylibs, and create a new subCache every time we are about to cross
-    // the subCacheTextLimit
-    CacheVMSize subCacheTextSize(0ULL);
-    for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
-        // Peek ahead to find the __TEXT size
-        CacheVMSize textSize(0ULL);
-        for ( DylibSegmentChunk& segmentInfo : cacheDylib.segments ) {
-            if ( segmentInfo.kind == DylibSegmentChunk::Kind::dylibText )
-                textSize += segmentInfo.cacheVMSize;
-        }
-
-        // If we exceed the current limit, then the current subCache is complete and we need
-        // to start a new one
-        if ( (subCacheTextSize + textSize) > this->config.layout.subCacheTextLimit ) {
-            // Create a new subCache
-            otherCaches.push_back(SubCache::makeSubCache(this->options));
-            currentSubCache = &otherCaches.back();
-
-            // Reset the limit for the next subCache
-            subCacheTextSize = CacheVMSize(0ULL);
-        }
-
-        subCacheTextSize += textSize;
-
-        // The subCache with libobjc gets the header info sections
-        // Add all the objc tables.  This must be done before we add libobjc's __TEXT
-        std::string_view libObjcInstallName = "/usr/lib/libobjc.A.dylib";
-        if ( dyld3::MachOFile::isExclaveKitPlatform(this->options.platform) )
-            libObjcInstallName = "/System/ExclaveKit/usr/lib/libobjc.A.dylib";
-        if ( cacheDylib.installName == libObjcInstallName )
-            this->addObjCOptimizationsToSubCache(*currentSubCache);
-
-        // We'll add LINKEDIT at the end.  As the shared region is <= 4GB in size, we can fit
-        // all the LINKEDIT in the last subCache and still keep it in range of 32-bit offsets
-        bool addLinkedit = !allLinkeditInLastSubCache;
-        currentSubCache->addDylib(cacheDylib, addLinkedit);
+    // the subCache size limit
+    if ( this->config.layout.discontiguous.has_value() ) {
+        const CacheVMSize subCacheTextLimit = this->config.layout.discontiguous.value().subCacheTextLimit;
+        CacheVMSize subCacheTextSize(0ULL);
+        for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
+            // Peek ahead to find the __TEXT size
+            CacheVMSize textSize(0ULL);
+            for ( DylibSegmentChunk& segmentInfo : cacheDylib.segments ) {
+                if ( segmentInfo.kind == DylibSegmentChunk::Kind::dylibText )
+                    textSize += segmentInfo.cacheVMSize;
+            }
+
+            // If we exceed the current limit, then the current subCache is complete and we need
+            // to start a new one
+            if ( (subCacheTextSize + textSize) > subCacheTextLimit ) {
+                // Create a new subCache
+                otherCaches.push_back(SubCache::makeSubCache(this->options));
+                currentSubCache = &otherCaches.back();
+
+                // Reset the limit for the next subCache
+                subCacheTextSize = CacheVMSize(0ULL);
+            }
+
+            subCacheTextSize += textSize;
+
+            currentSubCache->addDylib(this->config, cacheDylib);
+            currentSubCache->addLinkeditFromDylib(cacheDylib);
+        }
+    } else {
+        const CacheVMSize subCacheTextDataLimit = this->config.layout.contiguous.value().subCacheTextDataLimit;
+        const CacheVMSize regionPadding = this->config.layout.contiguous.value().subCachePadding;
+        CacheVMSize subCacheTextDataSize(regionPadding);
+
+        // If we added any objc optimizations, then take their size in to account now
+        for ( const Region& region : currentSubCache->regions ) {
+            if ( region.kind == Region::Kind::linkedit )
+                continue;
+            if ( region.chunks.empty() )
+                continue;
+            for ( const Chunk* chunk : region.chunks ) {
+                // Assume the worst in terms of padding, ie, that every segment is preceded by the max
+                // possible number of padding bytes
+                subCacheTextDataSize += CacheVMSize(chunk->alignment()) + chunk->cacheVMSize;
+            }
+        }
+
+        for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
+            // Peek ahead to find the __TEXT size
+            CacheVMSize textDataSize(0ULL);
+            for ( DylibSegmentChunk& segmentInfo : cacheDylib.segments ) {
+                if ( segmentInfo.kind == DylibSegmentChunk::Kind::dylibLinkedit )
+                    continue;
+
+                // Assume the worst in terms of padding, ie, that every segment is preceded by the max
+                // possible number of padding bytes
+                textDataSize += CacheVMSize(segmentInfo.alignment()) + segmentInfo.cacheVMSize;
+
+                // The One Cache stubs will also be included in this limit so account for them too
+                // See addStubsChunks()
+                if ( std::optional<Header::SectionInfo> sectInfo = getStubsSection(&segmentInfo) )
+                    textDataSize += CacheVMSize(sectInfo->size);
+            }
+
+            // If we exceed the current limit, then the current subCache is complete and we need
+            // to start a new one
+            if ( (subCacheTextDataSize + textDataSize) > subCacheTextDataLimit ) {
+                // Create a new subCache
+                otherCaches.push_back(SubCache::makeSubCache(this->options));
+                currentSubCache = &otherCaches.back();
+
+                // Reset the limit for the next subCache
+                subCacheTextDataSize = CacheVMSize(regionPadding);
+            }
+
+            subCacheTextDataSize += textDataSize;
+
+            // We'll add LINKEDIT at the end, not here
+            currentSubCache->addDylib(this->config, cacheDylib);
+            currentSubCache->addLinkeditFromDylib(cacheDylib);
+        }
     }
 
     // Add all the remaining content in to the final (current) subCache
-
-    // Add linkedit chunks from dylibs, if needed
-    if ( allLinkeditInLastSubCache ) {
-        for ( CacheDylib& cacheDylib : this->cacheDylibs )
-            currentSubCache->addLinkeditFromDylib(cacheDylib);
-    }
-
 
     // Add all the global optimizations
     this->addGlobalOptimizationsToSubCache(*currentSubCache);
@@ -3153,6 +3807,13 @@
                 sourceStringSize += symbolString.size() + 1;
                 ++sourceStringCount;
 
+                // rdar://129398821 (dyld cache builder add support for binds relative to dylib segments)
+                // skip synthetic dyld symbols
+                if ( symbolString.find("$dyld$") != std::string_view::npos ) {
+                    ++oldSymbolIndex;
+                    return;
+                }
+
                 auto itAndInserted = stringMap.insert({ symbolString, stringBufferSize });
                 // If we inserted the string, then account for the space
                 if ( itAndInserted.second )
@@ -3415,17 +4076,54 @@
     return Error();
 }
 
+// returns empty string if symbol is in specified boundDylibName,
+// otherwise it return where symbol actually is defined
+static CString getActualSymbolImplDylib(CString symbolName, CString boundDylibName,
+                                        const std::unordered_map<std::string_view, CacheDylib*>& installNameToDylibMap,
+                                        bool& symbolImplIsFunctionVariant, uint16_t& functionVariantIndex)
+{
+    // find if bind target is really in another dylib or is a function variant
+    __block CString betterDylib;
+    const auto& posi = installNameToDylibMap.find(boundDylibName);
+    if ( posi != installNameToDylibMap.end() ) {
+        CacheDylib* targetDylib = posi->second;
+        Symbol      symbol;
+        if ( targetDylib->inputImage->hasExportsTrie() && targetDylib->inputImage->exportsTrie().hasExportedSymbol(symbolName.c_str(), symbol) ) {
+            betterDylib = (const char*)(targetDylib->installName.data()); // FIXME
+            uint32_t fvi;
+            if (symbol.isFunctionVariant(fvi) ) {
+                symbolImplIsFunctionVariant = true;
+                functionVariantIndex        = fvi;
+            }
+        }
+        else {
+            // check if symbol is from re-exported dylib
+            const Header* targetHdr = (Header*)targetDylib->inputMF;
+            targetHdr->forEachLinkedDylib(^(const char* loadPath, LinkedDylibAttributes kind, Version32 compatVersion, Version32 curVersion, bool synthesizedLink, bool &stop) {
+                if ( kind.reExport ) {
+                    betterDylib = getActualSymbolImplDylib(symbolName, loadPath, installNameToDylibMap, symbolImplIsFunctionVariant, functionVariantIndex);
+                    if ( !betterDylib.empty() ) {
+                        stop = true;
+                    }
+                }
+            });
+        }
+    }
+    return betterDylib;
+}
+
+
 static void parseGOTs(const CacheDylib* dylib, const DylibSegmentChunk* chunk,
                       std::string_view segmentName, std::string_view sectionName,
-                      DylibSectionCoalescer::OptimizedSection& dylibOptimizedSection)
-{
-    const MachOFile* mf = dylib->inputMF;
+                      const std::unordered_map<std::string_view, CacheDylib*>& installNameToDylibMap,
+                      OptimizedGOTSection& dylibOptimizedSection)
+{
     __block Diagnostics diag;
 
     const bool log = false;
 
     // Skip ineligible dylibs
-    if ( !mf->hasChainedFixups() )
+    if ( !dylib->inputMF->hasChainedFixups() )
         return;
 
     // Some dylibs have auth gots in segments other than __AUTH_CONST. Skip them for now
@@ -3433,7 +4131,7 @@
         return;
 
     __block bool supportsGOTUniquing = false;
-    mf->withFileLayout(diag, ^(const mach_o::Layout& layout) {
+    dylib->inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
         mach_o::SplitSeg splitSeg(layout);
 
         if ( splitSeg.isV2() )
@@ -3443,13 +4141,11 @@
     if ( !supportsGOTUniquing )
         return;
 
-    if ( mf->isArch("x86_64") || mf->isArch("x86_64h") ) {
+    if ( dylib->inputHdr->isArch("x86_64") || dylib->inputHdr->isArch("x86_64h") ) {
         __block bool oldLinker = false;
-        mf->forEachSupportedBuildTool(^(dyld3::Platform platform, uint32_t tool, uint32_t version) {
+        dylib->inputHdr->forEachBuildTool(^(Platform platform, uint32_t tool, uint32_t version) {
             uint32_t majorVersion = version >> 16;
-            // uint32_t minorVersion = (version >> 8) && 0xFF;
-            // uint32_t veryMinorVersion = version && 0xFF;
-
+            
             if ( tool == TOOL_LD ) {
                 if ( majorVersion < 803 )
                     oldLinker = true;
@@ -3461,37 +4157,37 @@
     }
 
     // rdar://89319146
-    if ( mf->isArch("x86_64") || mf->isArch("x86_64h") ) {
-        if ( !strcmp(mf->installName(), "/System/Library/Frameworks/Foundation.framework/Versions/C/Foundation") )
+    if ( dylib->inputHdr->isArch("x86_64") || dylib->inputHdr->isArch("x86_64h") ) {
+        if ( !strcmp(dylib->inputHdr->installName(), "/System/Library/Frameworks/Foundation.framework/Versions/C/Foundation") )
             return;
-        if ( !strcmp(mf->installName(), "/usr/lib/system/libdispatch.dylib") )
+        if ( !strcmp(dylib->inputHdr->installName(), "/usr/lib/system/libdispatch.dylib") )
             return;
     }
 
     // rdar://86911139
-    if ( mf->builtForPlatform(dyld3::Platform::iOS)
-        && !strcmp(mf->installName(), "/System/Library/PrivateFrameworks/CoreUI.framework/CoreUI") )
+    if ( dylib->inputHdr->builtForPlatform(Platform::iOS)
+        && !strcmp(dylib->inputHdr->installName(), "/System/Library/PrivateFrameworks/CoreUI.framework/CoreUI") )
         return;
 
     // Dylib segment is eligible.  Walk the GOTs
-    __block std::optional<dyld3::MachOAnalyzer::SectionInfo> gotSectionInfo;
+    __block std::optional<std::pair<Header::SegmentInfo, Header::SectionInfo>> gotSectionInfo;
     __block uint16_t chainedFixupFormat = 0;
-    mf->forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
-        if ( sectInfo.segInfo.segName != segmentName )
+    dylib->inputHdr->forEachSection(^(const Header::SegmentInfo& segInfo, const Header::SectionInfo& sectInfo, bool& stop) {
+        if ( sectInfo.segmentName != segmentName )
             return;
-        if ( sectInfo.sectName != sectionName)
+        if ( sectInfo.sectionName != sectionName)
             return;
-        gotSectionInfo = sectInfo;
+        gotSectionInfo = { segInfo, sectInfo };
 
         // As we found the section we want, also get its chained fixup format
-        mf->withFileLayout(diag, ^(const mach_o::Layout& layout) {
+        dylib->inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
             mach_o::Fixups fixups(layout);
 
             fixups.withChainStarts(diag, ^(const dyld_chained_starts_in_image* starts) {
                 MachOFile::forEachFixupChainSegment(diag, starts,
                                                     ^(const dyld_chained_starts_in_segment* segmentInfo,
                                                       uint32_t segIndex, bool& stopSegment) {
-                    if ( segIndex == sectInfo.segInfo.segIndex ) {
+                    if ( segIndex == sectInfo.segIndex ) {
                         chainedFixupFormat = segmentInfo->pointer_format;
                         stopSegment = true;
                     }
@@ -3510,7 +4206,7 @@
         return;
 
     __block std::vector<mach_o::Fixups::BindTargetInfo> bindTargets;
-    mf->withFileLayout(diag, ^(const mach_o::Layout& layout) {
+    dylib->inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
         mach_o::Fixups fixups(layout);
 
         fixups.forEachBindTarget(diag, false, 0, ^(const mach_o::Fixups::BindTargetInfo& info, bool& stop) {
@@ -3527,20 +4223,19 @@
         return;
 
     __block std::vector<const char*> dependents;
-    mf->forEachDependentDylib(^(const char *loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
+    dylib->inputMF->forEachDependentDylib(^(const char *loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
         dependents.push_back(loadPath);
     });
 
-    auto* cacheGotSection = (CoalescedGOTSection*)dylibOptimizedSection.subCacheSection;
-    DylibSectionCoalescer::DylibSectionOffsetToCacheSectionOffset& offsetMap = dylibOptimizedSection.offsetMap;
+    bool hasFunctionVariantFixups = dylib->inputHdr->hasFunctionVariantFixups();
 
     // Walk the entries in this section
     // File layout so just add the file offset
-    const uint8_t* content      = (const uint8_t*)mf + gotSectionInfo->sectFileOffset;
+    const uint8_t* content      = (const uint8_t*)dylib->inputHdr + gotSectionInfo->second.fileOffset;
     const uint8_t* pos          = content;
-    const uint8_t* end          = content + gotSectionInfo->sectSize;
-    uint32_t       pointerSize  = mf->pointerSize();
-    assert((gotSectionInfo->sectSize % pointerSize == 0));
+    const uint8_t* end          = content + gotSectionInfo->second.size;
+    uint32_t       pointerSize  = dylib->inputHdr->pointerSize();
+    assert((gotSectionInfo->second.size % pointerSize == 0));
     while ( pos != end ) {
         const dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixup = (const dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)pos;
         pos += pointerSize;
@@ -3551,56 +4246,83 @@
         uint32_t sourceSectionOffset = (uint32_t)((uint64_t)fixup - (uint64_t)content);
 
         // Note down rebases, but otherwise skip them
+        __block bool     targetIsFunctionVariant  = false;
+        __block uint16_t functionVariantIndex     = 0;
         if ( !isBind ) {
-            dylibOptimizedSection.unoptimizedOffsets.insert(sourceSectionOffset);
-            continue;
-        }
-
-        // We don't support addends right now.  But hopefully GOTs don't need them anyway
-        if ( addend != 0 )
-            continue;
-
-        const mach_o::Fixups::BindTargetInfo& bindTarget = bindTargets[bindOrdinal];
-
-        // TODO: Weak GOTs.  See rdar://86510941
-        const char* targetInstallName = nullptr;
-        if ( (bindTarget.libOrdinal > 0) && ((unsigned)bindTarget.libOrdinal <= dependents.size()) ) {
-            targetInstallName = dependents[bindTarget.libOrdinal - 1];
-        } else {
-            dylibOptimizedSection.unoptimizedOffsets.insert(sourceSectionOffset);
-            continue;
-        }
-
-        MachOFile::PointerMetaData pmd(fixup, chainedFixupFormat);
-
-        typedef CoalescedGOTSection::GOTKey Key;
-        Key key = { bindTarget.symbolName, targetInstallName, pmd, bindTarget.weakImport };
-
-        int cacheSectionOffset = (int)(cacheGotSection->gotTargetsToOffsets.size() * pointerSize);
-        auto itAndInserted = cacheGotSection->gotTargetsToOffsets.insert({ key, cacheSectionOffset });
-        if ( itAndInserted.second ) {
-            // We inserted the element, so its offset is already valid.  Nothing else to do
-
+            if ( hasFunctionVariantFixups ) {
+                // see if this rebase is the local of an internal function variant
+                uint32_t segIndex = gotSectionInfo->first.segmentIndex;
+                dylib->inputImage->functionVariantFixups().forEachFixup(^(mach_o::FunctionVariantFixups::InternalFixup fixupInfo) {
+                    if ( fixupInfo.segIndex != segIndex )
+                        return;
+                    uint64_t segOffset = sourceSectionOffset + gotSectionInfo->second.address - gotSectionInfo->first.vmaddr;
+                    if ( fixupInfo.segOffset == segOffset ) {
+                        targetIsFunctionVariant = true;
+                        functionVariantIndex = fixupInfo.variantIndex;
+                    }
+                });
+            }
+            if ( !targetIsFunctionVariant ) {
+                dylibOptimizedSection.addUnoptimizedOffset(sourceSectionOffset);
+                continue;
+            }
+        }
+
+        const CacheDylib*           targetDylib          = nullptr;
+        CoalescedGOTSection::GOTKey key;
+        if ( targetIsFunctionVariant ) {
+            MachOFile::PointerMetaData pmd(fixup, chainedFixupFormat);
+            char* synthName;
+            asprintf(&synthName, "internal-function-variant#%d", functionVariantIndex);
+            key         = { synthName, dylib->installName, pmd, false, true };
+            targetDylib = dylib;
+        }
+        else {
+            // We don't support addends right now.  But hopefully GOTs don't need them anyway
+            if ( addend != 0 )
+                continue;
+
+            const mach_o::Fixups::BindTargetInfo& bindTarget = bindTargets[bindOrdinal];
+
+            // TODO: Weak GOTs.  See rdar://86510941
+            const char* targetInstallName = nullptr;
+            if ( (bindTarget.libOrdinal > 0) && ((unsigned)bindTarget.libOrdinal <= dependents.size()) ) {
+                targetInstallName = dependents[bindTarget.libOrdinal - 1];
+            } else {
+                dylibOptimizedSection.addUnoptimizedOffset(sourceSectionOffset);
+                continue;
+            }
+
+            // find if bind target is really in another dylib or is a function variant
+            CString betterDylib = getActualSymbolImplDylib(bindTarget.symbolName, targetInstallName, installNameToDylibMap, targetIsFunctionVariant, functionVariantIndex);
+            if ( !betterDylib.empty() && (betterDylib != targetInstallName) ) {
+                targetInstallName = betterDylib.c_str();
+                //    fprintf(stderr, "%s found in %s instead of %s\n", bindTarget.symbolName, betterDylib.c_str(), targetInstallName);
+            }
+
+            if ( targetIsFunctionVariant )
+                targetDylib = installNameToDylibMap.at(targetInstallName);
+
+            MachOFile::PointerMetaData pmd(fixup, chainedFixupFormat);
+            key = { bindTarget.symbolName, targetInstallName, pmd, bindTarget.weakImport, targetIsFunctionVariant };
+        }
+
+        if ( dylibOptimizedSection.addOptimizedOffset(sourceSectionOffset, pointerSize, key) ) {
             if (log) {
                 uint64_t gotOffset = ((uint64_t)pos - (uint64_t)content) - pointerSize;
                 printf("%s[%lld]: %s -> (%s, %s)\n",
-                       sectionName.data(), gotOffset, mf->installName(),
+                       sectionName.data(), gotOffset, dylib->inputHdr->installName(),
                        key.targetDylibName.data(), key.targetSymbolName.data());
             }
-        } else {
-            // Debugging only.  If we didn't include the GOT then we saved that many bytes
-            cacheGotSection->savedSpace += pointerSize;
-            cacheSectionOffset = itAndInserted.first->second;
-        }
-
-        // Now keep track of this offset in our source dylib as pointing to this offset
-        offsetMap[sourceSectionOffset] = cacheSectionOffset;
-    }
-
-    // Record which segment/section we just visited
-    uint32_t segmentIndex = gotSectionInfo->segInfo.segIndex;
-    dylibOptimizedSection.segmentIndex = segmentIndex;
-    dylibOptimizedSection.sectionVMOffsetInSegment = VMOffset(gotSectionInfo->sectAddr - gotSectionInfo->segInfo.vmAddr);
+        }
+
+        // store function-variant index in other map
+        if ( targetIsFunctionVariant )
+            dylibOptimizedSection.addFunctionVariantInfo(key, { targetDylib->cacheIndex, functionVariantIndex });
+    }
+
+    // Record which section we just visited
+    dylibOptimizedSection.setSourceSectionInfo(gotSectionInfo->second);
 }
 
 // This runs after we've assigned Chunk's to SubCache's, but before we've actually
@@ -3619,9 +4341,12 @@
 
     // DylibSegmentChunk's don't have a pointer to their cache dylib.  Make a map for them
     std::unordered_map<const InputFile*, CacheDylib*> fileToDylibMap;
+    std::unordered_map<std::string_view, CacheDylib*> installNameToDylibMap;
     fileToDylibMap.reserve(this->cacheDylibs.size());
-    for ( CacheDylib& dylib : this->cacheDylibs )
+    for ( CacheDylib& dylib : this->cacheDylibs ) {
         fileToDylibMap[dylib.inputFile] = &dylib;
+        installNameToDylibMap[dylib.installName] = &dylib;
+    }
 
     for ( SubCache& subCache : this->subCaches ) {
         // Find the DATA_CONST/AUTH_CONST in each SubCache, if it has any
@@ -3641,20 +4366,44 @@
         if ( (dataConstRegion == nullptr) && (authConstRegion == nullptr) )
             continue;
 
-        for ( bool auth : { false, true } ) {
-            if ( auth && (authConstRegion == nullptr) )
+        for ( UniquedGOTKind sectionKind : { UniquedGOTKind::regular, UniquedGOTKind::authGot, UniquedGOTKind::authPtr } ) {
+
+            Region* region = nullptr;
+            std::string_view segmentName;
+            std::string_view sectionName;
+            const char* kindName = nullptr;
+            CoalescedGOTSection* subCacheUniquedGOTs = nullptr;
+
+            // Skip sections if their segment doesn't exist
+            switch ( sectionKind ) {
+                case UniquedGOTKind::regular:
+                    region = dataConstRegion;
+                    segmentName = "__DATA_CONST";
+                    sectionName = "__got";
+                    kindName = "regular";
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.regularGOTs;
+                    break;
+                case UniquedGOTKind::authGot:
+                    region = authConstRegion;
+                    segmentName = "__AUTH_CONST";
+                    sectionName = "__auth_got";
+                    kindName = "auth-gots";
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.authGOTs;
+                    break;
+                case UniquedGOTKind::authPtr:
+                    region = authConstRegion;
+                    segmentName = "__AUTH_CONST";
+                    sectionName = "__auth_ptr";
+                    kindName = "auth-ptrs";
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.authPtrs;
+                    break;
+            }
+            if ( region == nullptr )
                 continue;
-            if ( !auth && (dataConstRegion == nullptr) )
-                continue;
-
-            Region& region = auth ? *authConstRegion : *dataConstRegion;
-            std::string_view segmentName = auth ? "__AUTH_CONST" : "__DATA_CONST";
-            std::string_view sectionName = auth ? "__auth_got" : "__got";
-            CoalescedGOTSection& subCacheUniquedGOTs = auth ? subCache.uniquedGOTsOptimizer.authGOTs : subCache.uniquedGOTsOptimizer.regularGOTs;
-
-            std::vector<DylibSectionCoalescer::OptimizedSection*> dylibOptimizedSections;
-            dylibOptimizedSections.reserve(region.chunks.size());
-            for ( const Chunk* chunk : region.chunks ) {
+
+            std::vector<OptimizedSection*> dylibOptimizedSections;
+            dylibOptimizedSections.reserve(region->chunks.size());
+            for ( const Chunk* chunk : region->chunks ) {
                 const DylibSegmentChunk* segmentChunk = chunk->isDylibSegmentChunk();
                 if ( !segmentChunk )
                     continue;
@@ -3662,109 +4411,34 @@
                 if ( chunk->name() != segmentName )
                     continue;
 
-                CacheDylib*                 dylib = fileToDylibMap.at(segmentChunk->inputFile);
-                auto&                       dylibUniquedGOTs = auth ? dylib->optimizedSections.auth_gots : dylib->optimizedSections.gots;
+                CacheDylib* dylib = fileToDylibMap.at(segmentChunk->inputFile);
+                OptimizedGOTSection* dylibUniquedGOTs = nullptr;
+                switch ( sectionKind ) {
+                    case UniquedGOTKind::regular:
+                        dylibUniquedGOTs = &dylib->optimizedSections.gots;
+                        break;
+                    case UniquedGOTKind::authGot:
+                        dylibUniquedGOTs = &dylib->optimizedSections.auth_gots;
+                        break;
+                    case UniquedGOTKind::authPtr:
+                        dylibUniquedGOTs = &dylib->optimizedSections.auth_ptrs;
+                        break;
+                }
 
                 // Set the dylib GOTs to point to the subCache they'll be uniqued to
-                dylibUniquedGOTs.subCacheSection = &subCacheUniquedGOTs;
-                dylibOptimizedSections.push_back(&dylibUniquedGOTs);
-
-                parseGOTs(dylib, segmentChunk, segmentName, sectionName, dylibUniquedGOTs);
-            }
-
-            if ( subCacheUniquedGOTs.gotTargetsToOffsets.empty() )
+                dylibUniquedGOTs->setSubCacheSection(subCacheUniquedGOTs);
+
+                parseGOTs(dylib, segmentChunk, segmentName, sectionName, installNameToDylibMap, *dylibUniquedGOTs);
+            }
+
+            if ( subCacheUniquedGOTs->empty() )
                 continue;
 
-            // Sort the coalesced GOTs based on the target install name.  We find GOTs in the order we parse
-            // the fixups in the dylibs, but we want the final cache to keep all GOTs for the same target near
-            // each other
-            typedef CoalescedGOTSection::GOTKey Key;
-            std::vector<Key> sortedKeys;
-            sortedKeys.reserve(subCacheUniquedGOTs.gotTargetsToOffsets.size());
-            for ( const auto& keyAndValue : subCacheUniquedGOTs.gotTargetsToOffsets )
-                sortedKeys.push_back(keyAndValue.first);
-
-            std::sort(sortedKeys.begin(), sortedKeys.end(),
-                      [](const Key& a, const Key& b) {
-                // Put libSystem first, then all the /usr/lib/system dylibs
-                // That way any GOTs for re-exports from libsystem will be close to similar GOTs
-                bool isLibsystemA = a.targetDylibName.find("libSystem.B.dylib") != std::string_view::npos;
-                bool isLibsystemB = b.targetDylibName.find("libSystem.B.dylib") != std::string_view::npos;
-                if ( isLibsystemA != isLibsystemB )
-                    return isLibsystemA;
-
-                bool isLibsystemReexportA = a.targetDylibName.find("/usr/lib/system") != std::string_view::npos;
-                bool isLibsystemReexportB = b.targetDylibName.find("/usr/lib/system") != std::string_view::npos;
-                if ( isLibsystemReexportA != isLibsystemReexportB )
-                    return isLibsystemReexportA;
-
-                if ( a.targetDylibName != b.targetDylibName )
-                    return (a.targetDylibName < b.targetDylibName);
-
-                // Install names are the same.  Sort by symbol name
-                return a.targetSymbolName < b.targetSymbolName;
-            });
-
-            // Rewrite entries from their original offset to the new offset
-            std::unordered_map<uint32_t, uint32_t> oldToNewOffsetMap;
-            for ( uint32_t i = 0; i != sortedKeys.size(); ++i ) {
-                const Key& key = sortedKeys[i];
-                auto it = subCacheUniquedGOTs.gotTargetsToOffsets.find(key);
-                assert(it != subCacheUniquedGOTs.gotTargetsToOffsets.end());
-
-                uint32_t newCacheSectionOffset = i * pointerSize;
-
-                // Record the offset mapping for updating the dylibs
-                oldToNewOffsetMap[it->second] = newCacheSectionOffset;
-
-                const bool log = false;
-                if ( log ) {
-                    printf("%s[%d]: %s\n", sectionName.data(), newCacheSectionOffset, key.targetSymbolName.data());
-                }
-
-                it->second = newCacheSectionOffset;
-            }
-
-            // Also rewrite entries in each dylib
-            for ( DylibSectionCoalescer::OptimizedSection* dylibOptimizedSection : dylibOptimizedSections ) {
-                for ( auto& keyAndCacheOffset : dylibOptimizedSection->offsetMap ) {
-                    auto it = oldToNewOffsetMap.find(keyAndCacheOffset.second);
-                    assert(it != oldToNewOffsetMap.end());
-                    keyAndCacheOffset.second = it->second;
-                }
-            }
-
-            // Add the new chunks to the subCache
-            if ( auth ) {
-                subCache.uniquedAuthGOTs                    = std::make_unique<UniquedGOTsChunk>();
-                subCache.uniquedAuthGOTs->cacheVMSize       = CacheVMSize((uint64_t)subCacheUniquedGOTs.gotTargetsToOffsets.size() * pointerSize);
-                subCache.uniquedAuthGOTs->subCacheFileSize  = CacheFileSize((uint64_t)subCacheUniquedGOTs.gotTargetsToOffsets.size() * pointerSize);
-
-                region.chunks.push_back(subCache.uniquedAuthGOTs.get());
-
-                // FIXME: Do we need this. No-one seems to read it from here, or could get it from the subCache instead
-                subCache.uniquedGOTsOptimizer.authGOTsChunk = subCache.uniquedAuthGOTs.get();
-                subCache.uniquedGOTsOptimizer.authGOTs.cacheChunk = subCache.uniquedGOTsOptimizer.authGOTsChunk;
-            } else {
-                subCache.uniquedGOTs                    = std::make_unique<UniquedGOTsChunk>();
-                subCache.uniquedGOTs->cacheVMSize       = CacheVMSize((uint64_t)subCacheUniquedGOTs.gotTargetsToOffsets.size() * pointerSize);
-                subCache.uniquedGOTs->subCacheFileSize  = CacheFileSize((uint64_t)subCacheUniquedGOTs.gotTargetsToOffsets.size() * pointerSize);
-
-                region.chunks.push_back(subCache.uniquedGOTs.get());
-
-                // FIXME: Do we need this. No-one seems to read it from here, or could get it from the subCache instead
-                subCache.uniquedGOTsOptimizer.regularGOTsChunk = subCache.uniquedGOTs.get();
-                subCache.uniquedGOTsOptimizer.regularGOTs.cacheChunk = subCache.uniquedGOTsOptimizer.regularGOTsChunk;
-            }
+            subCacheUniquedGOTs->finalize(pointerSize, sectionName, config, subCache, *region);
 
             if ( this->config.log.printStats ) {
-                uint64_t totalSourceGOTs = 0;
-                for ( DylibSectionCoalescer::OptimizedSection* dylibOptimizedSection : dylibOptimizedSections ) {
-                    totalSourceGOTs += dylibOptimizedSection->offsetMap.size();
-                }
-                const char* kind = auth ? "auth" : "regular";
                 stats.add("  got uniquing: uniqued %lld %s GOTs to %lld GOTs\n",
-                          totalSourceGOTs, kind, (uint64_t)subCacheUniquedGOTs.gotTargetsToOffsets.size());
+                          subCacheUniquedGOTs->numSourceGOTs(), kindName, subCacheUniquedGOTs->numCacheGOTs());
             }
         }
     }
@@ -3797,7 +4471,8 @@
         const DylibSegmentChunk* segmentA = a->isDylibSegmentChunk();
         const DylibSegmentChunk* segmentB = b->isDylibSegmentChunk();
 
-        if ( segmentA->kind == DylibSegmentChunk::Kind::dylibDataDirty ) {
+        // There can be data chunks that aren't dylib segments, e.g. ObjCHeaderInfoReadWriteChunk.
+        if ( segmentA && segmentB && segmentA->kind == DylibSegmentChunk::Kind::dylibDataDirty ) {
             const auto& orderA = dirtyDataSegmentOrdering.find(segmentA->inputFile->path);
             const auto& orderB = dirtyDataSegmentOrdering.find(segmentB->inputFile->path);
             bool        foundA = (orderA != dirtyDataSegmentOrdering.end());
@@ -3813,13 +4488,25 @@
                 return false;
         }
 
+        const DylibSegmentChunk* dylibA = a->isTPROChunk();
+        const DylibSegmentChunk* dylibB = b->isTPROChunk();
+        // Note this shouldn't be possible, but best to be safe and avoid asserting
+        if ( dylibA && dylibB ) {
+            // Sort dyld last so that its allocator gets packed with TPRO from other dylibs
+            bool isDyldA = dylibA->inputFile->path == "/usr/lib/dyld";
+            bool isDyldB = dylibB->inputFile->path == "/usr/lib/dyld";
+            if ( isDyldA != isDyldB )
+                return !isDyldA;
+        }
+
         // Note we are using a stable sort, so if the kind's aren't different, return false
         // and we'll keep Section's in the order they were added to the vector
         return false;
     };
 
-    auto linkeditSortOrder = [](const Chunk* a, const Chunk* b) -> bool {
-        // Sort read-only segments before LINKEDIT
+    auto dataConstSortOrder = [](const Chunk* a, const Chunk* b) -> bool {
+        // Sort TPRO_CONST before DATA_CONST. This only happens on x86_64
+        // where we put TPRO_CONST and DATA_CONST in the same Region
         if ( a->sortOrder() != b->sortOrder() )
             return a->sortOrder() < b->sortOrder();
 
@@ -3828,19 +4515,93 @@
         return false;
     };
 
-    // Only sort data/auth.  Everything else is already in order
+    auto tproConstSortOrder = [](const Chunk* a, const Chunk* b) -> bool {
+        // Sort uniqued GOTs before dylib TPRO_CONST.  This puts the uniqued
+        // GOTs before the libdyld TPRO_CONST, but should still be on the same page
+        if ( a->sortOrder() != b->sortOrder() )
+            return a->sortOrder() < b->sortOrder();
+
+        const DylibSegmentChunk* dylibA = a->isTPROChunk();
+        const DylibSegmentChunk* dylibB = b->isTPROChunk();
+        // Note this shouldn't be possible, but best to be safe and avoid asserting
+        if ( !dylibA || !dylibB )
+            return false;
+
+        // Sort dyld last so that its allocator gets packed with TPRO from other dylibs
+        bool isDyldA = dylibA->inputFile->path == "/usr/lib/dyld";
+        bool isDyldB = dylibB->inputFile->path == "/usr/lib/dyld";
+        if ( isDyldA != isDyldB )
+            return !isDyldA;
+
+        // Note we are using a stable sort, so if the kind's aren't different, return false
+        // and we'll keep Section's in the order they were added to the vector
+        return false;
+    };
+
+    auto readOnlySortOrder = [](const Chunk* a, const Chunk* b) -> bool {
+        // Sort read-only segments before LINKEDIT
+        if ( a->sortOrder() != b->sortOrder() )
+            return a->sortOrder() < b->sortOrder();
+
+        // Note we are using a stable sort, so if the kind's aren't different, return false
+        // and we'll keep Section's in the order they were added to the vector
+        return false;
+    };
+
     for ( SubCache& subCache : this->subCaches ) {
         for ( Region& region : subCache.regions ) {
-            if ( region.kind == Region::Kind::text ) {
-                std::stable_sort(region.chunks.begin(), region.chunks.end(), textSortOrder);
-            }
-            else if ( (region.kind == Region::Kind::data) || (region.kind == Region::Kind::auth) ) {
-                std::stable_sort(region.chunks.begin(), region.chunks.end(), dataSortOrder);
-            }
-            else if ( region.kind == Region::Kind::linkedit ) {
-                std::stable_sort(region.chunks.begin(), region.chunks.end(), linkeditSortOrder);
-            }
-        }
+            switch ( region.kind ) {
+                case Region::Kind::text:
+                    std::stable_sort(region.chunks.begin(), region.chunks.end(), textSortOrder);
+                    break;
+                case Region::Kind::dataConst:
+                case Region::Kind::authConst:
+                    std::stable_sort(region.chunks.begin(), region.chunks.end(), dataConstSortOrder);
+                    break;
+                case Region::Kind::tproConst:
+                case Region::Kind::tproAuthConst:
+                    std::stable_sort(region.chunks.begin(), region.chunks.end(), tproConstSortOrder);
+                    break;
+                case Region::Kind::data:
+                case Region::Kind::auth:
+                    std::stable_sort(region.chunks.begin(), region.chunks.end(), dataSortOrder);
+                    break;
+                case Region::Kind::readOnly:
+                case Region::Kind::linkedit:
+                    std::stable_sort(region.chunks.begin(), region.chunks.end(), readOnlySortOrder);
+                    break;
+                case Region::Kind::unmapped:
+                case Region::Kind::dynamicConfig:
+                case Region::Kind::codeSignature:
+                case Region::Kind::numKinds:
+                    break;
+            }
+        }
+    }
+
+    // After sorting, we have to add alignment chunks before/after x86_64 TPRO
+    if ( this->config.layout.tproIsInData )
+        addAlignmentChunks();
+}
+
+void SharedCacheBuilder::addAlignmentChunks()
+{
+    for ( SubCache& subCache : this->subCaches ) {
+        SubCache::forEachTPRORegionInData(&subCache, {}, ^(Region& region, const Chunk *firstChunk, const Chunk *lastChunk) {
+            // Add alignment before the first chunk
+            {
+                auto firstPos = std::find(region.chunks.begin(), region.chunks.end(), firstChunk);
+                assert(firstPos != region.chunks.end());
+                region.chunks.insert(firstPos, &region.alignmentChunks.emplace_back());
+            }
+
+            // Add alignment after the last chunk
+            {
+                auto lastPos = std::find(region.chunks.begin(), region.chunks.end(), lastChunk);
+                assert(lastPos != region.chunks.end());
+                region.chunks.insert(lastPos + 1, &region.alignmentChunks.emplace_back());
+            }
+        });
     }
 }
 
@@ -3900,8 +4661,10 @@
     };
 
     for ( const SubCache& subCache : this->subCaches ) {
+        calculateRegionSlideInfoSize(this->config, Region::Kind::tproConst, subCache.regions, subCache.tproConstSlideInfo);
         calculateRegionSlideInfoSize(this->config, Region::Kind::data, subCache.regions, subCache.dataSlideInfo);
         calculateRegionSlideInfoSize(this->config, Region::Kind::dataConst, subCache.regions, subCache.dataConstSlideInfo);
+        calculateRegionSlideInfoSize(this->config, Region::Kind::tproAuthConst, subCache.regions, subCache.tproAuthConstSlideInfo);
         calculateRegionSlideInfoSize(this->config, Region::Kind::auth, subCache.regions, subCache.authSlideInfo);
         calculateRegionSlideInfoSize(this->config, Region::Kind::authConst, subCache.regions, subCache.authConstSlideInfo);
     }
@@ -3933,7 +4696,7 @@
 
 void SharedCacheBuilder::printSubCaches() const
 {
-    const bool printSegments = this->config.log.printDebug;
+    const bool printSegments = this->config.log.printDebugCacheLayout;
 
     if ( !this->config.log.printStats )
         return;
@@ -3952,11 +4715,20 @@
                 case Region::Kind::dataConst:
                     regionName = "dataConst";
                     break;
+                case Region::Kind::tproConst:
+                    regionName = "tproConst";
+                    break;
                 case Region::Kind::auth:
                     regionName = "auth";
                     break;
                 case Region::Kind::authConst:
                     regionName = "authConst";
+                    break;
+                case Region::Kind::tproAuthConst:
+                    regionName = "tproAuthConst";
+                    break;
+                case Region::Kind::readOnly:
+                    regionName = "readOnly";
                     break;
                 case Region::Kind::linkedit:
                     regionName = "linkedit";
@@ -4158,12 +4930,15 @@
                     case Region::Kind::codeSignature:
                     case Region::Kind::numKinds:
                         break;
+                    case Region::Kind::tproConst:
                     case Region::Kind::data:
                     case Region::Kind::dataConst:
+                    case Region::Kind::tproAuthConst:
                     case Region::Kind::auth:
                     case Region::Kind::authConst:
                         lastReadWriteRegion = &region;
                         break;
+                    case Region::Kind::readOnly:
                     case Region::Kind::dynamicConfig:
                     case Region::Kind::linkedit:
                         lastReadOnlyRegion = &region;
@@ -4418,7 +5193,6 @@
         const Region& lastRegion = subCache.regions.back();
         uint64_t      bufferSize = (lastRegion.subCacheFileOffset + lastRegion.subCacheFileSize).rawValue();
 
-#if SUPPORT_CACHE_BUILDER_MEMORY_BUFFERS
         // The MRM builder has no file system, so use an in-memory buffer
         vm_address_t fullAllocatedBuffer;
         if ( kern_return_t kr = vm_allocate(mach_task_self(), &fullAllocatedBuffer, bufferSize, VM_FLAGS_ANYWHERE); kr != 0 ) {
@@ -4428,47 +5202,6 @@
         uint8_t *buffer = (uint8_t*)fullAllocatedBuffer;
         subCache.buffer     = buffer;
         subCache.bufferSize = bufferSize;
-#else
-        char pathTemplate[] = "/tmp/temp.XXXXXX";
-        int  fd             = mkstemp(pathTemplate);
-        if ( fd == -1 ) {
-            // Failed to create the file
-            return Error("could not create shared cache file because: %s", strerror(errno));
-        }
-
-        // Resize the file
-        if ( int result = ftruncate(fd, bufferSize); result == -1 ) {
-            // Failed to resize to the space we need
-            return Error("could not truncate shared cache file because: %s", strerror(errno));
-        }
-
-        void* buffer = mmap(nullptr, (vm_size_t)bufferSize, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
-        if ( buffer == MAP_FAILED ) {
-            // Failed to mmap the file
-            return Error("could not mmap shared cache file because: %s", strerror(errno));
-        }
-
-        // TODO: It would be great to unlink the file, so that it won't be there on disk if the builder crashes
-#if 0
-        // Unlink the file.  This way, it'll be removed if we crash
-        if ( int result = unlinkat(fd, pathTemplate, 0) ) {
-            // Failed to unlink the file
-            return Error("could not unlink shared cache file because: %s", strerror(errno));
-        }
-
-        // Close the file as we don't need it now that we have the mmapped buffer
-        if ( int result = close(fd) ) {
-            // Failed to close the file
-            return Error("could not close shared cache file because: %s", strerror(errno));
-        }
-#endif
-
-        subCache.buffer     = (uint8_t*)buffer;
-        subCache.bufferSize = bufferSize;
-        subCache.fd         = fd;
-        subCache.tempPath   = pathTemplate;
-
-#endif // SUPPORT_CACHE_BUILDER_MEMORY_BUFFERS
 
         if ( log ) {
             this->config.log.log("SubCache[%d] allocated (%p..%p)\n",
@@ -4494,6 +5227,7 @@
         assert(!cacheDylib.segments.empty());
         assert(cacheDylib.segments[0].kind == cache_builder::DylibSegmentChunk::Kind::dylibText);
         cacheDylib.cacheMF          = (MachOFile*)cacheDylib.segments[0].subCacheBuffer;
+        cacheDylib.cacheHdr         = (const Header*)cacheDylib.segments[0].subCacheBuffer;
         cacheDylib.cacheLoadAddress = cacheDylib.segments[0].cacheVMAddress;
     }
 
@@ -4509,7 +5243,7 @@
     }
 
     // Add a watchpoint for anything we need to debug
-#if DEBUG
+#if 0
     {
         CacheVMAddress vmAddrToWatch(0x00007FFB40FB4D58ULL);
         for ( const SubCache& subCache : this->subCaches ) {
@@ -4531,6 +5265,31 @@
     }
 #endif
 
+
+    // Add a watchpoint for anything we need to debug
+#if 0
+    {
+        CacheFileOffset fileOffsetToWatch(0x0012bc0ULL);
+        std::string_view suffixToWatch = ".01";
+        for ( const SubCache& subCache : this->subCaches ) {
+            if ( subCache.fileSuffix != suffixToWatch )
+                continue;
+            for ( const Region& region : subCache.regions ) {
+                for ( const Chunk* chunk : region.chunks ) {
+                    if ( fileOffsetToWatch < chunk->subCacheFileOffset )
+                        continue;
+                    if ( fileOffsetToWatch >= (chunk->subCacheFileOffset + chunk->subCacheFileSize) )
+                        continue;
+                    uint64_t offsetInChunk = fileOffsetToWatch.rawValue() - chunk->subCacheFileOffset.rawValue();
+                    uint8_t* addrToWatch = chunk->subCacheBuffer + offsetInChunk;
+                    printf("watchpoint set expression -w w -s 8 -- %p\n", addrToWatch);
+                    printf("");
+                }
+            }
+        }
+    }
+#endif
+
     return Error();
 }
 
@@ -4581,7 +5340,6 @@
 
                 assert(segment.subCacheFileSize.rawValue() == 0);
                 segment.subCacheFileSize = region->subCacheFileSize;
-
             }
         }
 
@@ -4638,7 +5396,10 @@
                 case Chunk::Kind::linkeditExportTrie:
                     movedLinkedit.kind = MovedLinkedit::Kind::exportTrie;
                     break;
-                default:
+                case Chunk::Kind::linkeditFunctionVariants:
+                    movedLinkedit.kind = MovedLinkedit::Kind::functionVariants;
+                    break;
+               default:
                     assert(0);
                     break;
             }
@@ -4685,6 +5446,40 @@
     });
 
     assert(!err.hasError());
+
+    // Now that we have an adjustor, use it to adjust the bindTargets
+    err = parallel::forEach(this->cacheDylibs, ^(size_t index, CacheDylib& cacheDylib) {
+        for ( CacheDylib::BindTarget& bindTarget : cacheDylib.bindTargets ) {
+            // Adjust the bind target.  We have a runtime offset for the target input dylib, but we need to know where that runtime Offset will
+            // map to in the target cache dylib
+            switch ( bindTarget.kind ) {
+                case CacheDylib::BindTarget::Kind::absolute:
+                    // Skip these.  They won't change due to shifting the input dylib in to the cache
+                    break;
+                case CacheDylib::BindTarget::Kind::inputImage: {
+                    // Convert from an input dylib offset to the cache dylib offset
+                    CacheDylib::BindTarget::InputImage inputImage = bindTarget.inputImage;
+                    InputDylibVMAddress    targetInputVMAddr = inputImage.targetDylib->inputLoadAddress + inputImage.targetRuntimeOffset;
+                    CacheVMAddress         targetCacheVMAddr = inputImage.targetDylib->adjustor->adjustVMAddr(targetInputVMAddr);
+
+                    // Actually change the bindTarget to reflect the new type
+                    bindTarget.kind = CacheDylib::BindTarget::Kind::cacheImage;
+                    bindTarget.inputImage.~InputImage();
+                    bindTarget.cacheImage = (CacheDylib::BindTarget::CacheImage) {
+                        VMOffset(targetCacheVMAddr - inputImage.targetDylib->cacheLoadAddress),
+                        inputImage.targetDylib, inputImage.isWeakDef,
+                        inputImage.isFunctionVariant, inputImage.functionVariantTableIndex
+                    };
+                    break;
+                }
+                case CacheDylib::BindTarget::Kind::cacheImage:
+                    return Error("Shouldn't see cacheImage fixups at this point");
+            }
+        }
+        return Error();
+    });
+
+    assert(!err.hasError());
 }
 
 void SharedCacheBuilder::adjustObjCClasses()
@@ -4793,11 +5588,34 @@
     }
 }
 
+Error SharedCacheBuilder::calculatePatchTableSize()
+{
+    Stats        stats(this->config);
+    Timer::Scope timedScope(this->config, "calculatePatchTableSize time");
+    
+    PatchTableBuilder& builder = this->patchTableOptimizer.builder;
+    Error err = builder.prepare(this->cacheDylibs, this->patchTableOptimizer.patchInfos);
+    if ( err.hasError() )
+        return err;
+
+    uint64_t patchTableSize = builder.getPatchTableSize();
+
+    auto* patchTableChunk = this->patchTableOptimizer.patchTableChunk;
+    patchTableChunk->cacheVMSize = CacheVMSize(patchTableSize);
+    patchTableChunk->subCacheFileSize = CacheFileSize(patchTableSize);
+
+    if ( this->config.log.printStats ) {
+        stats.add("  patch table: used %lld bytes\n", patchTableSize);
+    }
+    
+    return Error();
+}
+
 Error SharedCacheBuilder::emitPatchTable()
 {
     Stats        stats(this->config);
     Timer::Scope timedScope(this->config, "emitPatchTable time");
-    
+
     // Skip this optimization on simulator until we've qualified it there
     __block PatchTableBuilder::PatchableClassesSet      patchableObjCClasses;
     __block PatchTableBuilder::PatchableSingletonsSet   patchableCFObj2;
@@ -4809,38 +5627,204 @@
                 patchableObjCClasses.insert(cacheDylib.adjustor->adjustVMAddr(inputVMAddr));
             });
 
-            // Note we have a diagnostic object here, but we don't care if it fails.  Then we'll
-            // just skip singleton patching on this dylib
-            Diagnostics diag;
-            cacheDylib.cacheMF->forEachSingletonPatch(diag, ^(MachOFile::SingletonPatchKind kind, uint64_t runtimeOffset) {
+            cacheDylib.cacheHdr->forEachSingletonPatch(^(uint64_t runtimeOffset) {
                 patchableCFObj2.insert(cacheDylib.cacheLoadAddress + VMOffset(runtimeOffset));
             });
         }
     }
-    
-    PatchTableBuilder builder;
-    Error err = builder.build(this->cacheDylibs, this->patchTableOptimizer.patchInfos,
-                              patchableObjCClasses,
-                              patchableCFObj2,
-                              this->config.layout.cacheBaseAddress);
-    if ( err.hasError() )
-        return err;
-    
-    auto* patchTableChunk = this->patchTableOptimizer.patchTableChunk;
+
+    const auto* patchTableChunk = this->patchTableOptimizer.patchTableChunk;
     uint8_t* buffer = patchTableChunk->subCacheBuffer;
     uint64_t bufferSize = patchTableChunk->subCacheFileSize.rawValue();
     uint64_t patchInfoAddr = patchTableChunk->cacheVMAddress.rawValue();
+
+    PatchTableBuilder& builder = this->patchTableOptimizer.builder;
+    if ( Error error = builder.build(this->cacheDylibs, this->patchTableOptimizer.patchInfos, patchableObjCClasses, patchableCFObj2, this->config.layout.cacheBaseAddress) )
+        return error;
     if ( Error error = builder.write(buffer, bufferSize, patchInfoAddr); error.hasError() )
         return error;
-    
+
     // We don't need the patchInfos, so clear it to save memory
     this->patchTableOptimizer.patchInfos.clear();
 
-    if ( this->config.log.printStats ) {
-        uint64_t patchInfoSize = builder.getPatchTableSize();
-        stats.add("  patch table: used %lld out of %lld bytes of buffer\n", patchInfoSize, bufferSize);
-    }
-    
+    return Error();
+}
+
+Error SharedCacheBuilder::emitPrewarmingData()
+{
+    // Skip everything if the JSON file is empty
+    if ( this->options.prewarmingOptimizations.empty() )
+        return Error::none();
+
+    using json::Node;
+    using json::NodeValueType;
+
+    Stats        stats(this->config);
+    Timer::Scope timedScope(this->config, "emitPrewarmingData time");
+
+    __block Diagnostics diag;
+    Node rootNode = json::readJSON(diag, this->options.prewarmingOptimizations.data(),
+                                   this->options.prewarmingOptimizations.size(),
+                                   false /* useJSON5 */);
+    if ( diag.hasError() )
+        return Error("%s", diag.errorMessageCStr());
+
+    std::unordered_map<std::string_view, const CacheDylib*> dylibMap;
+    for ( const CacheDylib& cacheDylib : cacheDylibs )
+        dylibMap[cacheDylib.installName] = &cacheDylib;
+
+    // Add install names too, just in case dylibs are moving
+    dylibMap.insert(this->dylibAliases.begin(), this->dylibAliases.end());
+
+    // Format is something like:
+    // [
+    //     {
+    //         "install_name": "..."
+    //         "locations": [
+    //             {
+    //                 "name": "symbol name",
+    //                 "offset": 0
+    //             }
+    //         ]
+    //     },
+    // ...
+    // ]
+    if ( rootNode.type != NodeValueType::Array )
+        return Error::none();
+
+    std::vector<dyld_prewarming_entry> prewarmingEntries;
+
+    const uint64_t PrewarmingPageSize = DYLD_CACHE_PREWARMING_DATA_PAGE_SIZE;
+    const uint64_t PrewarmingPageMask = ~(PrewarmingPageSize - 1);
+
+    // FIXME: Do this in parallel if needed, or even in CacheDylib when it goes parallel
+    for ( const Node& fileNode : rootNode.array ) {
+        // Find the install name and locations array
+        auto installNameIt = fileNode.map.find("install_name");
+        auto locationsIt = fileNode.map.find("locations");
+        if ( (installNameIt == fileNode.map.end()) || (locationsIt == fileNode.map.end()) )
+            continue;
+
+        const Node& installNameNode = installNameIt->second;
+        const Node& locationsNode = locationsIt->second;
+
+        // Find the cache dylib for this install name
+        const CacheDylib* cacheDylib = nullptr;
+        if ( auto it = dylibMap.find(installNameNode.value); it != dylibMap.end() )
+            cacheDylib = it->second;
+        else
+            continue;
+
+        // Get the exports trie
+        __block const uint8_t* trieStart = nullptr;
+        __block size_t trieSize = 0;
+        cacheDylib->inputMF->withFileLayout(diag, ^(const mach_o::Layout &layout) {
+            if ( layout.linkedit.exportsTrie.hasValue() ) {
+                trieStart = layout.linkedit.exportsTrie.buffer;
+                trieSize  = layout.linkedit.exportsTrie.bufferSize;
+            }
+        });
+        if ( diag.hasError() )
+            return Error("%s", diag.errorMessageCStr());
+
+        if ( trieStart == nullptr )
+            continue;
+
+        ExportsTrie exportsTrie(trieStart, trieSize);
+
+        // Lazily make a map of local symbols if we need it
+        __block std::unordered_map<std::string_view, uint64_t> localsMap;
+        auto populateLocals = ^() {
+            cacheDylib->inputMF->withFileLayout(diag, ^(const mach_o::Layout& layout) {
+                mach_o::SymbolTable symbolTable(layout);
+
+                symbolTable.forEachLocalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type,
+                                                       uint8_t n_sect, uint16_t n_desc, bool& stop) {
+                    if ( ((n_type & N_TYPE) == N_SECT) && ((n_type & N_STAB) == 0) ) {
+                        localsMap[symbolName] = (n_value - cacheDylib->inputLoadAddress.rawValue());
+                    }
+                });
+            });
+        };
+
+        // Walk all locations, looking them up in exports/locals
+        for ( const Node& locationNode : locationsNode.array ) {
+            auto nameIt = locationNode.map.find("name");
+            auto offsetIt = locationNode.map.find("offset");
+            if ( (nameIt == locationNode.map.end()) || (offsetIt == locationNode.map.end()) )
+                continue;
+
+            uint64_t offset = parseRequiredInt(diag, offsetIt->second);
+            if ( diag.hasError() )
+                return Error("%s", diag.errorMessageCStr());
+
+            // Check for an export first, then a local
+            uint64_t implOffset = 0;
+            mach_o::Symbol symbol;
+            if ( exportsTrie.hasExportedSymbol(nameIt->second.value.c_str(), symbol) ) {
+                if ( !symbol.isRegular(implOffset) )
+                    continue;
+            } else {
+                if ( localsMap.empty() ) {
+                    populateLocals();
+                    if ( diag.hasError() )
+                        return Error("%s", diag.errorMessageCStr());
+                }
+
+                if ( auto localsIt = localsMap.find(nameIt->second.value); localsIt != localsMap.end() )
+                    implOffset = localsIt->second;
+            }
+
+            InputDylibVMAddress inputVMAddr(cacheDylib->inputLoadAddress + VMOffset(implOffset));
+            CacheVMAddress cacheVMAddr = cacheDylib->adjustor->adjustVMAddr(inputVMAddr);
+            cacheVMAddr += VMOffset(offset);
+
+            VMOffset cacheVMOffset = cacheVMAddr - this->config.layout.cacheBaseAddress;
+
+            prewarmingEntries.push_back({ cacheVMOffset.rawValue() & PrewarmingPageMask, 1 });
+        }
+    }
+
+    PrewarmingOptimizer& opt = this->prewarmingOptimizer;
+    dyld_prewarming_header* header = (dyld_prewarming_header*)opt.prewarmingChunk->subCacheBuffer;
+    const void* bufferEnd = opt.prewarmingChunk->subCacheBuffer + opt.prewarmingChunk->subCacheFileSize.rawValue();
+
+    if ( prewarmingEntries.empty() ) {
+        header->version = 1;
+        header->count = 0;
+        return Error::none();
+    }
+
+    // Sort by ascending VM addresses
+    std::sort(prewarmingEntries.begin(), prewarmingEntries.end(),
+              [](const dyld_prewarming_entry& a, const dyld_prewarming_entry& b) {
+        return a.cacheVMOffset < b.cacheVMOffset;
+    });
+
+    // Combine adjacent entries
+    std::vector<dyld_prewarming_entry> combinedEntries;
+    combinedEntries.push_back(prewarmingEntries.front());
+    for ( const dyld_prewarming_entry& entry : std::span(prewarmingEntries).subspan(1) ) {
+        dyld_prewarming_entry& last = combinedEntries.back();
+        if ( entry.cacheVMOffset == last.cacheVMOffset )
+            continue;
+        if ( entry.cacheVMOffset == (last.cacheVMOffset + PrewarmingPageSize) ) {
+            ++last.numPages;
+        } else {
+            combinedEntries.push_back(entry);
+        }
+    }
+
+    // write the results
+    dyld_prewarming_entry* firstOutputEntry = &header->entries[0];
+    dyld_prewarming_entry* lastOutputEntry = &header->entries[combinedEntries.size()];
+    if ( lastOutputEntry > bufferEnd )
+        return Error("overflow in prewarming optimizer (%p vs %p)", lastOutputEntry, bufferEnd);
+
+    header->version = 1;
+    header->count = (uint32_t)combinedEntries.size();
+    memcpy(firstOutputEntry, combinedEntries.data(), sizeof(dyld_prewarming_entry) * header->count);
+
     return Error();
 }
 
@@ -4870,7 +5854,7 @@
         }
     } else {
         const char* binPath = "/usr/bin/";
-        if ( options.platform == dyld3::Platform::driverKit )
+        if ( options.platform == Platform::driverKit )
             binPath = "/System/Library/DriverExtensions/";
         for ( const InputFile* exeFile : executableFiles ) {
             if ( startsWith(exeFile->path, binPath) )
@@ -4928,8 +5912,8 @@
             }
 
             // The cache segments don't have the permissions.  Get that from the load commands
-            cacheDylib.cacheMF->forEachSegment(^(const MachOFile::SegmentInfo& info, bool& stop) {
-                segments[info.segIndex].protections = info.protections;
+            cacheDylib.cacheHdr->forEachSegment(^(const Header::SegmentInfo& info, bool& stop) {
+                segments[info.segmentIndex].protections = info.initProt;
             });
             this->dylibSegmentLayout.push_back(std::move(segments));
         }
@@ -4956,6 +5940,11 @@
                         linkeditLayout.exportsTrie.entryCount  = 0; // Not needed here
                         linkeditLayout.exportsTrie.hasLinkedit = true;
                         break;
+                    case MovedLinkedit::Kind::functionVariants:
+                        linkeditLayout.functionVariants.buffer      = kindAndLinkdit.second.cacheLocation;
+                        linkeditLayout.functionVariants.bufferSize  = (uint32_t)kindAndLinkdit.second.dataSize.rawValue();
+                        linkeditLayout.functionVariants.hasLinkedit = true;
+                        break;
                     case MovedLinkedit::Kind::numKinds:
                         // This should never happen
                         assert(false);
@@ -4981,21 +5970,21 @@
         this->executableSegmentLayout.reserve(executableFiles.size());
         for ( const InputFile* executableFile : executableFiles ) {
             __block std::vector<mach_o::SegmentLayout> segments;
-            executableFile->mf->forEachSegment(^(const MachOFile::SegmentInfo& info, bool& stop) {
+            ((const Header*)executableFile->mf)->forEachSegment(^(const Header::SegmentInfo& info, bool& stop) {
                 // Note file layout here, not VM layout
                 uint8_t*              segmentBuffer = (uint8_t*)executableFile->mf + info.fileOffset;
                 mach_o::SegmentLayout segment;
-                segment.vmAddr      = info.vmAddr;
-                segment.vmSize      = info.vmSize;
+                segment.vmAddr      = info.vmaddr;
+                segment.vmSize      = info.vmsize;
                 segment.fileOffset  = info.fileOffset;
                 segment.fileSize    = info.fileSize;
                 segment.buffer      = segmentBuffer;
-                segment.protections = info.protections;
+                segment.protections = info.initProt;
 
                 segment.kind        = mach_o::SegmentLayout::Kind::unknown;
-                if ( !strcmp(info.segName, "__TEXT") ) {
+                if ( info.segmentName == "__TEXT" ) {
                     segment.kind    = mach_o::SegmentLayout::Kind::text;
-                } else if ( !strcmp(info.segName, "__LINKEDIT") ) {
+                } else if ( info.segmentName == "__LINKEDIT" ) {
                     segment.kind    = mach_o::SegmentLayout::Kind::linkedit;
                 }
 
@@ -5087,7 +6076,7 @@
         }
     }
 
-    Loader::LoadOptions::Finder loaderFinder = ^(Diagnostics& loadDiag, dyld3::Platform, const char* loadPath, const dyld4::Loader::LoadOptions& options) {
+    Loader::LoadOptions::Finder loaderFinder = ^(Diagnostics& loadDiag, Platform, const char* loadPath, const dyld4::Loader::LoadOptions& options) {
         auto pos = loadersMap.find(loadPath);
         if ( pos != loadersMap.end() ) {
             return (const Loader*)pos->second;
@@ -5116,7 +6105,7 @@
         Diagnostics loadDiag;
         ((Loader*)ldr)->loadDependents(loadDiag, state, options);
         if ( loadDiag.hasError() ) {
-            return Error("%s, loading dependents of %s", loadDiag.errorMessageCStr(), ldr->path());
+            return Error("%s, loading dependents of %s", loadDiag.errorMessageCStr(), ldr->path(state));
         }
     }
 
@@ -5148,7 +6137,7 @@
         return Error("Could not find a main executable for building cache loaders");
 
     const LayoutBuilder  layoutBuilder(cacheDylibs, { });
-    EphemeralAllocator   processConfigAlloc;
+    STACK_ALLOCATOR(processConfigAlloc, 0);
     __block dyld4::Vector<ProcessConfig::DyldCache::CacheDylib> processConfigDylibs(processConfigAlloc);
 
     for ( uint32_t dylibIndex = 0; dylibIndex != this->cacheDylibs.size(); ++dylibIndex ) {
@@ -5171,9 +6160,9 @@
     }
 
     // build PrebuiltLoaderSet of all dylibs in cache
+    STACK_ALLOCATOR(alloc, 0);
     KernelArgs         kernArgs(mainExecutable, { "test.exe" }, {}, {});
     SyscallDelegate    osDelegate;
-    EphemeralAllocator alloc;
     ProcessConfig      processConfig(&kernArgs, osDelegate, alloc);
     RuntimeLocks       locks;
     RuntimeState       state(processConfig, locks, alloc);
@@ -5236,14 +6225,14 @@
             __block InputDylibVMAddress inputOptPtrsVMAddress;
             __block uint64_t            sectionSize = 0;
             __block bool                found       = false;
-            cacheDylib->inputMF->forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) {
-                if ( (strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0) && (strncmp(sectInfo.segInfo.segName, "__AUTH", 6) != 0) )
+            cacheDylib->inputHdr->forEachSection(^(const Header::SectionInfo& sectInfo, bool& stop) {
+                if ( !sectInfo.segmentName.starts_with("__DATA") && !sectInfo.segmentName.starts_with("__AUTH") )
                     return;
-                if ( strcmp(sectInfo.sectName, "__objc_opt_ptrs") != 0 )
+                if ( sectInfo.sectionName != "__objc_opt_ptrs" )
                     return;
 
-                inputOptPtrsVMAddress = InputDylibVMAddress(sectInfo.sectAddr);
-                sectionSize           = sectInfo.sectSize;
+                inputOptPtrsVMAddress = InputDylibVMAddress(sectInfo.address);
+                sectionSize           = sectInfo.size;
 
                 found = true;
                 stop  = true;
@@ -5260,7 +6249,7 @@
             //     T protocolClass;
             // };
             // typedef struct objc_opt_pointerlist_tt<uintptr_t> objc_opt_pointerlist_t;
-            if ( sectionSize < cacheDylib->inputMF->pointerSize() ) {
+            if ( sectionSize < cacheDylib->inputHdr->pointerSize() ) {
                 return Error("libobjc's pointer list section is too small (metadata not optimized)");
             }
 
@@ -5322,7 +6311,7 @@
 
     const LayoutBuilder  layoutBuilder(cacheDylibs, this->exeInputFiles);
     const LayoutBuilder* layoutBuilderPtr = &layoutBuilder;
-    EphemeralAllocator   processConfigAlloc;
+    STACK_ALLOCATOR(processConfigAlloc, 0);
     dyld4::Vector<ProcessConfig::DyldCache::CacheDylib> processConfigDylibsOwner(processConfigAlloc);
     auto& processConfigDylibs = processConfigDylibsOwner;
 
@@ -5354,8 +6343,8 @@
         }
 
         // Assume last segment file size is the overall file size
-        __block uint64_t fileSize = 0;
-        inputFile->mf->forEachSegment(^(const MachOFile::SegmentInfo& info, bool& stop) {
+        __block uint32_t fileSize = 0;
+        ((const Header*)inputFile->mf)->forEachSegment(^(const Header::SegmentInfo& info, bool& stop) {
             fileSize = std::max(fileSize, info.fileOffset + info.fileSize);
         });
         otherMapping[inputFile->path] = { inputFile->mf, fileSize };
@@ -5390,16 +6379,16 @@
         KernelArgs       kernArgs(mainMF, { "test.exe" }, {}, {});
         SyscallDelegate  osDelegate;
         osDelegate._mappedOtherDylibs = otherMapping;
-        osDelegate._gradedArchs       = &this->options.archs;
+        osDelegate._gradedArchs       = &this->options.gradedArchs;
         //osDelegate._dyldCache           = dyldCache;
-        EphemeralAllocator alloc;
+        STACK_ALLOCATOR(alloc, 0);
         ProcessConfig      processConfig(&kernArgs, osDelegate, alloc);
         RuntimeLocks       locks;
         RuntimeState       state(processConfig, locks, alloc);
         RuntimeState*      statePtr = &state;
         Diagnostics        launchDiag;
 
-        processConfig.reset(mainMF, exeFile->path.c_str(), nullptr);
+        processConfig.reset(mainMF, exeFile->path.c_str(), exeFile->size, nullptr);
         state.resetCachedDylibsArrays(cachedDylibsLoaderSet);
 
         // FIXME: This is terrible and needs to be a real reset method
@@ -5407,7 +6396,9 @@
         processConfig.dyldCache.dylibsExpectedOnDisk = !this->options.dylibsRemovedFromDisk;
         processConfig.dyldCache.development = isDevelopmentSharedCache(this->options);
 
-        if ( !this->objcOptimizer.objcDylibs.empty() ) {
+        // Disable objc optimizations from EK shared cache
+        bool emitObjcOpts = !this->options.platform.isExclaveKit();
+        if ( !this->objcOptimizer.objcDylibs.empty() && emitObjcOpts ) {
             processConfig.dyldCache.objcClassHashTable = (const objc::ClassHashTable*)this->objcClassOptimizer.classHashTableChunk->subCacheBuffer;
             processConfig.dyldCache.objcSelectorHashTable = (const objc::SelectorHashTable*)this->objcSelectorOptimizer.selectorHashTableChunk->subCacheBuffer;
             processConfig.dyldCache.objcProtocolHashTable = (const objc::ProtocolHashTable*)this->objcProtocolOptimizer.protocolHashTableChunk->subCacheBuffer;
@@ -5418,18 +6409,24 @@
             processConfig.dyldCache.unslidLoadAddress = config.layout.cacheBaseAddress.rawValue();
         }
 
-        Loader::LoadOptions::Finder loaderFinder = ^(Diagnostics& diag, dyld3::Platform plat, const char* loadPath, const dyld4::Loader::LoadOptions& loadOptions) {
+        // check if main executable is valid mach-o
+        if ( mach_o::Error lerr = state.config.process.mainExecutableHdr->valid(state.config.process.mainExecutableSliceSize) ) {
+            fprintf(stderr, "warning: can't build PrebuiltLoaderSet for '%s': %s\n", exeFile->path.c_str(), lerr.message());
+            return Error();
+        }
+
+        Loader::LoadOptions::Finder loaderFinder = ^(Diagnostics& diag, Platform plat, const char* loadPath, const dyld4::Loader::LoadOptions& loadOptions) {
             // when building macOS cache, there may be some incorrect catalyst paths
-            if ( (plat == dyld3::Platform::iOSMac) && (strncmp(loadPath, "/System/iOSSupport/", 19) != 0) ) {
+            if ( (plat == Platform::macCatalyst) && (strncmp(loadPath, "/System/iOSSupport/", 19) != 0) ) {
                 char altPath[PATH_MAX];
                 strlcpy(altPath, "/System/iOSSupport", PATH_MAX);
                 strlcat(altPath, loadPath, PATH_MAX);
-                if ( const dyld4::PrebuiltLoader* ldr = cachedDylibsLoaderSet->findLoader(altPath) )
+                if ( const dyld4::PrebuiltLoader* ldr = cachedDylibsLoaderSet->findLoader(*statePtr, altPath) )
                     return (const Loader*)ldr;
             }
 
             // check if path is a dylib in the dyld cache, then use its PrebuiltLoader
-            if ( const dyld4::PrebuiltLoader* ldr = cachedDylibsLoaderSet->findLoader(loadPath) )
+            if ( const dyld4::PrebuiltLoader* ldr = cachedDylibsLoaderSet->findLoader(*statePtr, loadPath) )
                 return (const Loader*)ldr;
 
             // call through to getLoader() which will expand @paths
@@ -5458,6 +6455,23 @@
                 // FIXME: Propagate errors
                 return Error();
             }
+
+            // Set dylibs to be fixedUp before we partition delay init, as it uses this state
+            for ( const Loader* ldr : state.loaded ) {
+                if ( const PrebuiltLoader* prebuiltLdr = ldr->isPrebuiltLoader() )
+                    prebuiltLdr->setFixedUp(state);
+            }
+
+            // split off delay loaded dylibs into delayLoaded vector
+            // We have to do this before making the PrebuiltLoaderSet as objc in the closure needs
+            // to know which shared cache dylibs are delay or not.
+            STACK_ALLOC_ARRAY(const Loader*, loadersTemp, state.loaded.size());
+            for (const Loader* ldr : state.loaded)
+                loadersTemp.push_back(ldr);
+            std::span<const Loader*> allLoaders(&loadersTemp[0], (size_t)loadersTemp.count());
+            std::span<const Loader*> topLoaders = allLoaders.subspan(0, 1);
+            state.partitionDelayLoads(allLoaders, topLoaders);
+
             state.setMainLoader(mainLoader);
             const dyld4::PrebuiltLoaderSet* prebuiltAppSet = dyld4::PrebuiltLoaderSet::makeLaunchSet(launchDiag, state, missingPaths);
             if ( launchDiag.hasError() ) {
@@ -5483,40 +6497,39 @@
         return Error();
     });
 
+    const PrebuiltLoaderChunk* loaderChunk = this->prebuiltLoaderBuilder.executablesLoaderChunk;
+    const uint64_t loaderBufferSize = loaderChunk->subCacheFileSize.rawValue();
+
     assert(!err.hasError());
-
     std::map<std::string_view, const dyld4::PrebuiltLoaderSet*> prebuiltsMap;
     uint64_t prebuiltsSpace = 0;
+    uint64_t requiredPrebuiltsSpace = 0;
     for ( uint64_t i = 0; i != this->exeInputFiles.size(); ++i ) {
         const InputFile*                exeFile   = this->exeInputFiles[i];
         const dyld4::PrebuiltLoaderSet* loaderSet = executableLoaders[i];
         if ( loaderSet == nullptr )
             continue;
 
+        uint64_t requiredSpace = alignTo(loaderSet->size(), alignof(dyld4::PrebuiltLoaderSet));
+        requiredPrebuiltsSpace += requiredSpace;
+        if ( (prebuiltsSpace + requiredSpace) > loaderBufferSize ) {
+            // skip this loader if it doesn't fit
+            this->warning("cache buffer too small to hold executable PrebuiltLoaderSet '%s' (size=%lldKB)",
+                          exeFile->path.c_str(), requiredSpace / 1024);
+            continue;
+        }
+
         prebuiltsMap[exeFile->path.c_str()] = loaderSet;
-        prebuiltsSpace += alignTo(loaderSet->size(), 8);
-    }
-
-    const PrebuiltLoaderChunk* loaderChunk = this->prebuiltLoaderBuilder.executablesLoaderChunk;
-    uint64_t loaderBufferSize = loaderChunk->subCacheFileSize.rawValue();
+        prebuiltsSpace += requiredSpace;
+    }
+
+    if ( requiredPrebuiltsSpace != prebuiltsSpace ) {
+        this->warning("cache buffer too small to hold executable PrebuiltLoaderSet (prebuiltLoaderSet size=%lluKB, buffer size=%lldKB)",
+                      requiredPrebuiltsSpace / 1024, loaderBufferSize / 1024);
+    }
 
     if ( this->config.log.printStats ) {
         stats.add("  dyld4 executable Loader's : used %lld out of %lld bytes of buffer\n", prebuiltsSpace, loaderBufferSize);
-    }
-
-    if ( prebuiltsSpace > loaderBufferSize ) {
-        if ( dylibHasMissingDependency ) {
-            // At least one dylib was evicted.  If it was soemthing common, like UIKit/AppKit, then its going to
-            // end up being included in every executable loader and the buffer will overflow
-            this->warning("cache buffer too small to hold executable PrebuiltLoaderSet (prebuiltLoaderSet size=%lluKB, buffer size=%lldKB)",
-                          prebuiltsSpace / 1024, loaderBufferSize / 1024);
-
-            // For now, just empty the map.  That'll let us emit an empty Trie and PBLS
-            prebuiltsMap.clear();
-        } else {
-            return Error("cache buffer too small to hold executable PrebuiltLoaderSet (prebuiltLoaderSet size=%lluKB, buffer size=%lldKB)",
-                         prebuiltsSpace / 1024, loaderBufferSize / 1024);
-        }
     }
 
     // copy all PrebuiltLoaderSets into cache
@@ -5554,7 +6567,7 @@
 
         size_t size = pbls->size();
         ::memcpy(poolBase + currentPoolOffset, pbls, size);
-        currentPoolOffset += alignTo(size, 8);
+        currentPoolOffset += alignTo(size, alignof(dyld4::PrebuiltLoaderSet));
     }
 
     const CacheTrieChunk* trieChunk = this->prebuiltLoaderBuilder.executableTrieChunk;
@@ -5869,7 +6882,7 @@
 
         // Get the PrebuiltLoader* for this cache dylib
         const PrebuiltLoader* ldr = cachedDylibsLoaderSet->atIndex(cacheDylib.cacheIndex);
-        assert(ldr->path() == cacheDylib.installName);
+        //assert(ldr->path(state) == cacheDylib.installName); // can't do assert because state is not passed to this method
 
         CacheVMAddress ldrVMAddr = getVMAddressInSection(*this->prebuiltLoaderBuilder.cacheDylibsLoaderChunk, ldr);
 
@@ -5998,26 +7011,25 @@
         if ( cacheDylib.installName != "/usr/lib/libobjc.A.dylib" )
             continue;
 
-        cacheDylib.cacheMF->forEachSection(^(const dyld3::MachOFile::SectionInfo &sectInfo,
-                                             bool malformedSectionRange, bool &stop) {
-            if (strcmp(sectInfo.segInfo.segName, "__TEXT") != 0)
+        cacheDylib.cacheHdr->forEachSection(^(const Header::SegmentInfo &segInfo, const Header::SectionInfo &sectInfo, bool &stop) {
+            if ( sectInfo.segmentName != "__TEXT" )
                 return;
-            if (strcmp(sectInfo.sectName, "__objc_opt_ro") != 0)
+            if ( sectInfo.sectionName != "__objc_opt_ro" )
                 return;
 
             // Find the buffer for the section
             stop = true;
 
-            const DylibSegmentChunk& segment = cacheDylib.segments[sectInfo.segInfo.segIndex];
-
-            VMAddress sectionVMAddr(sectInfo.sectAddr);
-            VMAddress segmentVMAddr(sectInfo.segInfo.vmAddr);
+            const DylibSegmentChunk& segment = cacheDylib.segments[sectInfo.segIndex];
+
+            VMAddress sectionVMAddr(sectInfo.address);
+            VMAddress segmentVMAddr(segInfo.vmaddr);
             VMOffset sectionOffsetInSegment = sectionVMAddr - segmentVMAddr;
             uint8_t* sectionBuffer = segment.subCacheBuffer + sectionOffsetInSegment.rawValue();
 
             // All fields in the old header are offsets from the header.  This is how much to
             // shift them by
-            uint64_t headerCacheOffset = sectInfo.sectAddr - this->config.layout.cacheBaseAddress.rawValue();
+            uint64_t headerCacheOffset = sectInfo.address - this->config.layout.cacheBaseAddress.rawValue();
 
             // Found the section, now write the content
             objc_opt::objc_opt_t* libROHeader = (objc_opt::objc_opt_t *)sectionBuffer;
@@ -6063,22 +7075,12 @@
     }
 }
 
-// Struct matching dyld4::LibdyldDyld4Section to be used with a variable pointer size.
-// This is so we can use it in the shared cache builder, which is always
-// 64-bit but can emit 32-bit structs
-template <typename P>
-struct FixedSizeLibdyldDyld4Section {
-    P apis;
-    P allImageInfos;
-    P defaultVars[5];
-    P dyldLookupFuncAddr;
-    P tlv_get_addrAddr;
-};
-
-static_assert(sizeof(FixedSizeLibdyldDyld4Section<intptr_t>) == sizeof(dyld4::LibdyldDyld4Section));
-
 void SharedCacheBuilder::optimizeTLVs()
 {
+    // driverkit does not support thread-locals, all other platforms do
+    if ( options.platform == Platform::driverKit )
+        return;
+
     Stats        stats(this->config);
     Timer::Scope timedScope(this->config, "optimizeTLVs time");
 
@@ -6105,60 +7107,20 @@
         return;
     }
 
-    // Find the tlv_get_addrAddr from inside the __dyld4 section
-    __block CacheVMAddress getAddrVMAddr;
-    __block bool foundTLVGetAddr = false;
-    libdyldDylib->cacheMF->forEachSection(^(const MachOFile::SectionInfo &sectInfo,
-                                            bool malformedSectionRange, bool &stop) {
-        if ( strcmp(sectInfo.sectName, "__dyld4") != 0 )
-            return;
-
-        if ( (strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0)
-            && (strncmp(sectInfo.segInfo.segName, "__AUTH", 6) != 0) )
-            return;
-
-        // Found the section we need.  Now to check if its valid
-        stop = true;
-
-        const DylibSegmentChunk& segment = libdyldDylib->segments[sectInfo.segInfo.segIndex];
-
-        VMAddress sectionVMAddr(sectInfo.sectAddr);
-        VMAddress segmentVMAddr(sectInfo.segInfo.vmAddr);
-        VMOffset sectionOffsetInSegment = sectionVMAddr - segmentVMAddr;
-        uint8_t* sectionBuffer = segment.subCacheBuffer + sectionOffsetInSegment.rawValue();
-
-
-        if ( this->config.layout.is64 ) {
-            typedef FixedSizeLibdyldDyld4Section<uint64_t> dyld4_section_t;
-            if ( sectInfo.sectSize < sizeof(dyld4_section_t) ) {
-                // Old libdyld without the field we need
-                return;
-            }
-
-            const dyld4_section_t* dyldSection = (dyld4_section_t*)sectionBuffer;
-            CacheVMAddress cacheBaseAddress = this->config.layout.cacheBaseAddress;
-            getAddrVMAddr = Fixup::Cache64::getCacheVMAddressFromLocation(cacheBaseAddress,
-                                                                          &dyldSection->tlv_get_addrAddr);
-        } else {
-            typedef FixedSizeLibdyldDyld4Section<uint32_t> dyld4_section_t;
-            if ( sectInfo.sectSize < sizeof(dyld4_section_t) ) {
-                // Old libdyld without the field we need
-                return;
-            }
-
-            const dyld4_section_t* dyldSection = (dyld4_section_t*)sectionBuffer;
-            CacheVMAddress cacheBaseAddress = this->config.layout.cacheBaseAddress;
-            getAddrVMAddr = Fixup::Cache32::getCacheVMAddressFromLocation(cacheBaseAddress,
-                                                                          &dyldSection->tlv_get_addrAddr);
-        }
-
-        foundTLVGetAddr = true;
-    });
-
-    if ( !foundTLVGetAddr ) {
-        this->warning("Could not find tlv_get_addr (TLVs not optimized)");
+    // Find _tlv_get_addr function in libdyld.dylib
+    const Image& libdyldImg = *libdyldDylib->inputImage;
+
+    Symbol tlvBootstrapSymbol;
+    if ( !libdyldImg.hasExportsTrie() || !libdyldImg.exportsTrie().hasExportedSymbol("__tlv_bootstrap", tlvBootstrapSymbol) ) {
+        this->warning("Could not find '_tlv_get_addr' in libdyld");
         return;
     }
+    // NOTE:  magic here:
+    // To cleanly error of if TLVs are not set up, the on-disk thunks bind to _tlv_bootstrap, which aborts in invoked.
+    // The thunks when setup are changed to point to _tlv_get_addr, which is not an exported symbol.  Therefore
+    // the cache builder has no way to find it. The fix/hack is that _tlv_bootstrap and _tlv_get_addr are written
+    // in assembly and always 8 bytes apart.
+    CacheVMAddress getAddrVMAddr(libdyldDylib->cacheLoadAddress.rawValue() + tlvBootstrapSymbol.implOffset() + 8);
 
     // We read the value for this symbol to know the first key we can allocate for TLVs
     // We then have to stop optimizing if and when we reach "end", that's the
@@ -6210,7 +7172,7 @@
             return;
         }
 
-        if ( !cacheDylib.cacheMF->hasThreadLocalVariables() )
+        if ( !cacheDylib.cacheHdr->hasThreadLocalVariables() )
             continue;
 
         // Get the next available key (one key per dylib)
@@ -6227,69 +7189,117 @@
             return;
         }
 
-        cacheDylib.cacheMF->forEachSection(^(const MachOFile::SectionInfo& sectInfo,
-                                             bool malformedSectionRange, bool& stop) {
-            if ( (sectInfo.sectFlags & SECTION_TYPE) != S_THREAD_LOCAL_VARIABLES )
+        // find initial content for all thread locals in this dylib
+        __block bool        initialContentNonZero = false;
+        __block uint64_t    initialContentAddr    = 0;
+        __block size_t      initialContentSize    = 0;
+        cacheDylib.cacheHdr->forEachSection(^(const Header::SectionInfo& sectInfo, bool& stop) {
+            switch ( sectInfo.flags & SECTION_TYPE ) {
+                case S_THREAD_LOCAL_REGULAR:
+                    initialContentNonZero = true;
+                    [[clang::fallthrough]];
+                case S_THREAD_LOCAL_ZEROFILL:
+                    if ( initialContentSize == 0 ) {
+                        // first of N contiguous TLV template sections, record as if this was only section
+                        initialContentAddr = sectInfo.address;
+                        initialContentSize = sectInfo.size;
+                    }
+                    else {
+                        // non-first of N contiguous TLV template sections, accumlate values
+                        initialContentSize = sectInfo.address + sectInfo.size - initialContentAddr;
+                    }
+                    break;
+            }
+        });
+
+        cacheDylib.cacheHdr->forEachSection(^(const Header::SegmentInfo &segInfo, const Header::SectionInfo& sectInfo, bool& stop) {
+            if ( (sectInfo.flags & SECTION_TYPE) != S_THREAD_LOCAL_VARIABLES )
                 return;
 
-            DylibSegmentChunk& segment = cacheDylib.segments[sectInfo.segInfo.segIndex];
-
-            VMAddress sectionVMAddr(sectInfo.sectAddr);
-            VMAddress segmentVMAddr(sectInfo.segInfo.vmAddr);
+            DylibSegmentChunk& segment = cacheDylib.segments[sectInfo.segIndex];
+
+            VMAddress sectionVMAddr(sectInfo.address);
+            VMAddress segmentVMAddr(segInfo.vmaddr);
             VMOffset sectionOffsetInSegment = sectionVMAddr - segmentVMAddr;
             uint8_t* sectionBuffer = segment.subCacheBuffer + sectionOffsetInSegment.rawValue();
 
             if ( this->config.layout.is64 ) {
-                struct tlv_thunk_t
+                struct tlv_disk_thunk64_t
                 {
-                    uint64_t thunk;
+                    uint64_t func;
                     uint64_t key;
                     uint64_t offset;
                 };
 
-                uint32_t count = (uint32_t)(sectInfo.sectSize / sizeof(tlv_thunk_t));
-                tlv_thunk_t* thunkBuffer = (tlv_thunk_t*)sectionBuffer;
-                for ( uint32_t i = 0; i != count; ++i ) {
-                    tlv_thunk_t& tlvThunk = thunkBuffer[i];
-
-                    // Set the key to the next available key
-                    tlvThunk.key = key;
+                uint32_t            count       = (uint32_t)(sectInfo.size / sizeof(tlv_disk_thunk64_t));
+                tlv_disk_thunk64_t* thunkBuffer = (tlv_disk_thunk64_t*)sectionBuffer;
+                for ( uint32_t i = 0; i < count; ++i ) {
+                    size_t                                      offset   = thunkBuffer[i].offset; // only value needed from disk format
+                    dyld::ThreadLocalVariables::TLV_Thunkv2&   newThunk = *((dyld::ThreadLocalVariables::TLV_Thunkv2*)&thunkBuffer[i]);
+
+                    // Reset the fields using TLV_Thunkv2 format
+                    newThunk.key                 = key;
+                    newThunk.offset              = (uint32_t)offset;
+                    newThunk.initialContentSize  = (uint32_t)initialContentSize;
+                    newThunk.initialContentDelta = 0;
+                    if ( initialContentNonZero ) {
+                        uint64_t newThunkDeltaFieldAddr = (sectInfo.address + i*sizeof(tlv_disk_thunk64_t) + offsetof(dyld::ThreadLocalVariables::TLV_Thunkv2,initialContentDelta));
+                        newThunk.initialContentDelta = (int32_t)(initialContentAddr - newThunkDeltaFieldAddr);
+                    }
 
                     // Set the thunk to tlv_get_addr()
-                    uint8_t high8 = 0;
-                    uint16_t authDiversity = 0;
-                    bool authHasAddrDiv = false;
-                    uint8_t authKey = ptrauth_key_asia;
-                    bool isAuth = this->config.layout.hasAuthRegion;
-                    Fixup::Cache64::setLocation(this->config.layout.cacheBaseAddress, &tlvThunk.thunk,
+                    uint8_t  high8          = 0;
+                    uint16_t authDiversity  = 0;
+                    bool     authHasAddrDiv = false;
+                    uint8_t  authKey        = ptrauth_key_asia;
+                    bool     isAuth         = this->config.layout.hasAuthRegion;
+                    Fixup::Cache64::setLocation(this->config.layout.cacheBaseAddress, &newThunk.func,
                                                 getAddrVMAddr, high8,
                                                 authDiversity, authHasAddrDiv, authKey, isAuth);
 
                     // Add to ASLR tracker
-                    segment.tracker.add(&tlvThunk.thunk);
+                    segment.tracker.add(&newThunk.func);
                 }
-            } else {
-                struct tlv_thunk_t
+            } 
+            else {
+                struct tlv_disk_thunk32_t
                 {
-                    uint32_t thunk;
+                    uint32_t func;
                     uint32_t key;
                     uint32_t offset;
                 };
-
-                uint32_t count = (uint32_t)(sectInfo.sectSize / sizeof(tlv_thunk_t));
-                tlv_thunk_t* thunkBuffer = (tlv_thunk_t*)sectionBuffer;
-                for ( uint32_t i = 0; i != count; ++i ) {
-                    tlv_thunk_t& tlvThunk = thunkBuffer[i];
-
-                    // Set the key to the next available key
-                    tlvThunk.key = key;
+                struct tlv_cache_thunk32_t
+                {
+                    uint32_t     func;
+                    uint16_t     key;
+                    uint16_t     offset;
+                    int32_t      machHeaderDelta; // if < 0, content is found by walking load commands. If > 0, then it is size and content is all zeros
+                };
+                static_assert(sizeof(tlv_disk_thunk32_t) == sizeof(tlv_cache_thunk32_t));
+                uint32_t            loadAddr    = (uint32_t)(cacheDylib.cacheHdr->preferredLoadAddress());
+                uint32_t            count       = (uint32_t)(sectInfo.size / sizeof(tlv_disk_thunk32_t));
+                tlv_disk_thunk32_t* thunkBuffer = (tlv_disk_thunk32_t*)sectionBuffer;
+                for ( uint32_t i = 0; i < count; ++i ) {
+                    uint32_t               offset   = thunkBuffer[i].offset; // only value needed from disk format
+                    tlv_cache_thunk32_t&   newThunk = *((tlv_cache_thunk32_t*)&thunkBuffer[i]);
+
+                    if ( offset > 0xFFFF )
+                        this->warning("thread-local too large (%u max 65535) in %s", offset, cacheDylib.inputFile->path.c_str());
+
+                    // Reset the fields using TLV_Thunkv2 format
+                    newThunk.key                 = (uint16_t)key;
+                    newThunk.offset              = (uint16_t)offset;    // FIXME: error if offset > 0xFFFF
+                    newThunk.machHeaderDelta     = (uint32_t)initialContentSize;
+                    if ( initialContentNonZero ) {
+                        uint64_t newThunkDeltaFieldAddr = (sectInfo.address + i*sizeof(tlv_disk_thunk32_t) + offsetof(tlv_cache_thunk32_t,machHeaderDelta));
+                        newThunk.machHeaderDelta = (int32_t)(loadAddr - newThunkDeltaFieldAddr);
+                    }
 
                     // Set the thunk to tlv_get_addr()
-                    Fixup::Cache32::setLocation(this->config.layout.cacheBaseAddress, &tlvThunk.thunk,
-                                                getAddrVMAddr);
+                    Fixup::Cache32::setLocation(this->config.layout.cacheBaseAddress, &newThunk.func, getAddrVMAddr);
 
                     // Add to ASLR tracker
-                    segment.tracker.add(&tlvThunk.thunk);
+                    segment.tracker.add(&newThunk.func);
                 }
             }
         });
@@ -6329,22 +7339,36 @@
         if ( (dataConstRegion == nullptr) && (authConstRegion == nullptr) )
             continue;
 
-        for ( bool auth : { false, true } ) {
-            if ( auth && (authConstRegion == nullptr) )
+        for ( UniquedGOTKind sectionKind : { UniquedGOTKind::regular, UniquedGOTKind::authGot, UniquedGOTKind::authPtr } ) {
+
+            Region* region = nullptr;
+            CoalescedGOTSection* subCacheUniquedGOTs = nullptr;
+
+            // Skip sections if their segment doesn't exist
+            switch ( sectionKind ) {
+                case UniquedGOTKind::regular:
+                    region = dataConstRegion;
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.regularGOTs;
+                    break;
+                case UniquedGOTKind::authGot:
+                    region = authConstRegion;
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.authGOTs;
+                    break;
+                case UniquedGOTKind::authPtr:
+                    region = authConstRegion;
+                    subCacheUniquedGOTs = &subCache.uniquedGOTsOptimizer.authPtrs;
+                    break;
+            }
+
+            if ( region == nullptr )
                 continue;
-            if ( !auth && (dataConstRegion == nullptr) )
+
+            if ( subCacheUniquedGOTs->empty() )
                 continue;
-
-            Region& region = auth ? *authConstRegion : *dataConstRegion;
-            CoalescedGOTSection& subCacheUniquedGOTs = auth ? subCache.uniquedGOTsOptimizer.authGOTs : subCache.uniquedGOTsOptimizer.regularGOTs;
-            if ( subCacheUniquedGOTs.cacheChunk == nullptr )
-                continue;
-
-            UniquedGOTsChunk* subCacheGOTChunk = subCacheUniquedGOTs.cacheChunk->isUniquedGOTsChunk();
 
             std::set<const void*> seenFixups;
             std::vector<PatchInfo::GOTInfo> gots;
-            for ( const Chunk* chunk : region.chunks ) {
+            for ( const Chunk* chunk : region->chunks ) {
                 const DylibSegmentChunk* segmentChunk = chunk->isDylibSegmentChunk();
                 if ( !segmentChunk )
                     continue;
@@ -6355,46 +7379,55 @@
                 // Walk all the binds in this dylib, looking for GOT uses of the bind
                 assert(cacheDylib->bindTargets.size() == dylibPatchInfo.bindGOTUses.size());
                 assert(cacheDylib->bindTargets.size() == dylibPatchInfo.bindAuthGOTUses.size());
+                assert(cacheDylib->bindTargets.size() == dylibPatchInfo.bindAuthPtrUses.size());
                 for ( uint32_t bindIndex = 0; bindIndex != cacheDylib->bindTargets.size(); ++bindIndex ) {
                     const CacheDylib::BindTarget& bindTarget = cacheDylib->bindTargets[bindIndex];
 
-                    std::vector<PatchInfo::GOTInfo>* bindUses = nullptr;
-                    if ( auth ) {
-                        bindUses = &dylibPatchInfo.bindAuthGOTUses[bindIndex];
-                    } else {
-                        bindUses = &dylibPatchInfo.bindGOTUses[bindIndex];
+                    std::span<PatchInfo::GOTInfo> bindUses;
+                    switch ( sectionKind ) {
+                        case UniquedGOTKind::regular:
+                            bindUses = dylibPatchInfo.bindGOTUses[bindIndex];
+                            break;
+                        case UniquedGOTKind::authGot:
+                            bindUses = dylibPatchInfo.bindAuthGOTUses[bindIndex];
+                            break;
+                        case UniquedGOTKind::authPtr:
+                            bindUses = dylibPatchInfo.bindAuthPtrUses[bindIndex];
+                            break;
                     }
+
+                    if ( bindUses.empty() )
+                        continue;
 
                     // For absolute binds, just set the pointers and move on
                     if ( bindTarget.kind == CacheDylib::BindTarget::Kind::absolute ) {
-                        for ( const PatchInfo::GOTInfo& got : *bindUses ) {
-                            CacheVMAddress gotVMAddr = got.patchInfo.cacheVMAddr;
-                            assert(gotVMAddr >= subCacheGOTChunk->cacheVMAddress);
-                            assert(gotVMAddr < (subCacheGOTChunk->cacheVMAddress + subCacheGOTChunk->cacheVMSize));
-                            VMOffset cacheSectionVMOffset = gotVMAddr - subCacheGOTChunk->cacheVMAddress;
-
-                            const void* fixupLoc = subCacheGOTChunk->subCacheBuffer + cacheSectionVMOffset.rawValue();
+                        for ( const PatchInfo::GOTInfo& got : bindUses ) {
+                            CacheVMAddress gotVMAddr = got.useLocation.clientGOT->cacheVMAddress + got.useLocation.clientGOTOffset;
+                            const void* fixupLoc = subCacheUniquedGOTs->gotLocation(gotVMAddr);
+                            uint64_t targetValue = std::get<uint64_t>(got.targetValue);
                             if ( this->config.layout.is64 ) {
-                                *(uint64_t*)fixupLoc = got.targetValue.rawValue();
+                                *(uint64_t*)fixupLoc = targetValue;
                             } else {
-                                *(uint32_t*)fixupLoc = (uint32_t)got.targetValue.rawValue();
+                                *(uint32_t*)fixupLoc = (uint32_t)targetValue;
                             }
                         }
                         continue;
                     }
 
-                    gots.insert(gots.end(), bindUses->begin(), bindUses->end());
+                    assert(bindTarget.kind == CacheDylib::BindTarget::Kind::cacheImage);
+                    gots.insert(gots.end(), bindUses.begin(), bindUses.end());
                 }
             }
 
             // Found all the GOTs/authGOTS for this subCache.  Now we need to emit them
             for ( const PatchInfo::GOTInfo& got : gots ) {
-                CacheVMAddress gotVMAddr = got.patchInfo.cacheVMAddr;
-                assert(gotVMAddr >= subCacheGOTChunk->cacheVMAddress);
-                assert(gotVMAddr < (subCacheGOTChunk->cacheVMAddress + subCacheGOTChunk->cacheVMSize));
-                VMOffset cacheSectionVMOffset = gotVMAddr - subCacheGOTChunk->cacheVMAddress;
-
-                void* rawFixupLoc = subCacheGOTChunk->subCacheBuffer + cacheSectionVMOffset.rawValue();
+                CacheVMAddress gotVMAddr = got.useLocation.clientGOT->cacheVMAddress + got.useLocation.clientGOTOffset;
+
+                // Function variants don't get emitted right now
+                if ( !subCacheUniquedGOTs->shouldEmitGOT(gotVMAddr) )
+                    continue;
+
+                void* rawFixupLoc = subCacheUniquedGOTs->gotLocation(gotVMAddr);
 
                 // Ignore dupes
                 if ( seenFixups.count(rawFixupLoc) )
@@ -6402,11 +7435,13 @@
 
                 seenFixups.insert(rawFixupLoc);
 
-                CacheVMAddress targetVMAddr = config.layout.cacheBaseAddress + got.targetValue;
+                const DylibOffset& dylibOffset = std::get<DylibOffset>(got.targetValue);
+                InputDylibVMAddress inputVMAddr = dylibOffset.cacheDylib->inputLoadAddress + dylibOffset.vmOffset;
+                CacheVMAddress targetVMAddr = dylibOffset.cacheDylib->adjustor->adjustVMAddr(inputVMAddr);
                 if ( this->config.layout.is64 ) {
                     uint64_t high8 = 0;
                     uint64_t finalVMAddr = targetVMAddr.rawValue();
-                    if ( !got.patchInfo.authenticated ) {
+                    if ( !got.useLocation.authenticated ) {
                         high8 = (finalVMAddr >> 56);
                         if ( high8 != 0 ) {
                             // Remove high8 from the vmAddr
@@ -6417,16 +7452,16 @@
                                                 rawFixupLoc,
                                                 CacheVMAddress(finalVMAddr),
                                                 high8,
-                                                got.patchInfo.discriminator,
-                                                got.patchInfo.usesAddressDiversity, got.patchInfo.key,
-                                                got.patchInfo.authenticated);
+                                                got.useLocation.discriminator,
+                                                got.useLocation.usesAddressDiversity, got.useLocation.key,
+                                                got.useLocation.authenticated);
                 } else {
                     Fixup::Cache32::setLocation(this->config.layout.cacheBaseAddress,
                                                 rawFixupLoc,
                                                 CacheVMAddress(targetVMAddr));
                 }
 
-                subCacheGOTChunk->tracker.add(rawFixupLoc);
+                subCacheUniquedGOTs->trackFixup(rawFixupLoc);
             }
         }
     }
@@ -6483,7 +7518,7 @@
         objc_visitor::Visitor& objcVisitor = objcVisitors[dylibObjCIndex];
 
         if ( log ) {
-            printf("  at 0x%llx in %s\n", protocolVMAddr, objcVisitor.mf()->installName());
+            printf("  at 0x%llx in %s\n", protocolVMAddr, objcVisitor.hdr()->installName());
         }
 
         __block bool foundProtocol = false;
@@ -6641,7 +7676,7 @@
                         error = Error("Superclass of class '%s' is weak-import"
                                       "and missing.  Referenced in %s",
                                       objcClass.getName(objcVisitor),
-                                      objcVisitor.mf()->installName());
+                                      objcVisitor.hdr()->installName());
                     stopClass = true;
                     this->objcOptimizer.foundMissingWeakSuperclass = true;
                 }
@@ -6674,7 +7709,7 @@
                 classMap[classVMAddr] = (uint32_t)classInfos.size();
 
             if ( log ) {
-                printf("%s: [0x%08llx] %s%s\n", objcVisitor.mf()->installName(), classVMAddr.rawValue(),
+                printf("%s: [0x%08llx] %s%s\n", objcVisitor.hdr()->installName(), classVMAddr.rawValue(),
                        objcClass.getName(objcVisitor), objcClass.isMetaClass ? " (meta)" : "");
             }
 
@@ -6729,7 +7764,7 @@
         worklist.insert(worklist.end(), classInfo->subClasses.begin(), classInfo->subClasses.end());
         bool elidedSomething = false;
         const objc_visitor::Class& objcClass = classInfo->classPos;
-
+        const bool isSwiftClass = objcClass.isSwift(*classInfo->objcVisitor);
         auto& map = objcClass.isMetaClass ? metaclassMap : classMap;
 
         std::optional<VMAddress> superclassVMAddr = objcClass.getSuperclassVMAddr(*classInfo->objcVisitor);
@@ -6770,7 +7805,7 @@
                     continue;
 
                 // skip ivars that swiftc has optimized away
-                if ( ivar.elided(*classInfo->objcVisitor) ) {
+                if ( isSwiftClass && ivar.elided(*classInfo->objcVisitor) ) {
                     if ( log ) {
                         if ( !elidedSomething )
                             printf("adjusting ivars for %s\n", objcClass.getName(*classInfo->objcVisitor));
@@ -7160,7 +8195,7 @@
     size_t objcIndex = 0;
     for (size_t cacheIndex = 0; cacheIndex < this->cacheDylibs.size(); cacheIndex++) {
         CacheDylib& cacheDylib = this->cacheDylibs[cacheIndex];
-        if ( !cacheDylib.inputMF->hasObjC() )
+        if ( !cacheDylib.inputHdr->hasObjC() )
             continue;
 
         this->objcCategoryOptimizer.preAttachedDylibs.insert(objcIndex);
@@ -7217,13 +8252,65 @@
                          this->objcOptimizer.headerInfoReadOnlyChunk->subCacheBuffer,
                          this->objcOptimizer.headerInfoReadWriteChunk->subCacheBuffer,
                          this->objcOptimizer.headerInfoReadOnlyChunk->cacheVMAddress,
-                         this->swiftProtocolConformanceOptimizer);
-
+                         swiftPrespecializedDylib,
+                         this->swiftOptimizer);
     if ( diag.hasError() )
         return Error("Couldn't build Swift protocol opts because: %s", diag.errorMessageCStr());
 
     return Error();
 }
+
+static void getFunctionVariantTable(const CacheDylib& cacheDylib, uint64_t& vmAddr, uint64_t& vmSize)
+{
+    vmAddr = 0;
+    vmSize = 0;
+    for ( const LinkeditDataChunk& chunk : cacheDylib.linkeditChunks ) {
+        if ( chunk.isFunctionVariantsTable() ) {
+            vmAddr = chunk.cacheVMAddress.rawValue();
+            vmSize = chunk.cacheVMSize.rawValue();
+        }
+    }
+}
+
+void SharedCacheBuilder::emitFunctionVariants()
+{
+    // find uniqued GOT slots in each subcache that point to function variants
+    for ( SubCache& subCache : this->subCaches ) {
+        subCache.uniquedGOTsOptimizer.forEachFunctionVariant(^(const CoalescedGOTSection::FunctionVariantInfo& fv, uint64_t gotVMAddr,
+                                                               dyld3::MachOFile::PointerMetaData pmd) {
+            uint32_t targetDylibIndex = fv.dylibIndex;
+            uint64_t fvTableVmAddr;
+            uint64_t fvTableVmSize;
+            getFunctionVariantTable(this->cacheDylibs[targetDylibIndex], fvTableVmAddr, fvTableVmSize);
+            dyld_cache_function_variant_entry entry;
+            entry.fixupLocVmAddr               = gotVMAddr;
+            entry.functionVariantTableVmAddr   = fvTableVmAddr;
+            entry.functionVariantTableSizeDiv4 = fvTableVmSize/4;
+            entry.dylibHeaderVmAddr            = this->cacheDylibs[targetDylibIndex].cacheLoadAddress.rawValue();
+            entry.variantIndex                 = fv.variantIndex;
+            entry.pacAuth                      = pmd.authenticated;
+            entry.pacAddress                   = pmd.usesAddrDiversity;
+            entry.pacKey                       = pmd.key;
+            entry.pacDiversity                 = pmd.diversity;
+            entry.targetDylibIndex             = targetDylibIndex;
+            assert(entry.variantIndex == fv.variantIndex);
+            this->functionVariantsOptimizer.infos.push_back(entry);
+        });
+    }
+
+    // make sure it fits in estimated size
+    size_t actualSize = offsetof(dyld_cache_function_variant_info, entries[this->functionVariantsOptimizer.infos.size()]);
+    assert(actualSize < this->functionVariantsOptimizer.fvInfoTotalByteSize);
+    this->functionVariantsOptimizer.fvInfoTotalByteSize = actualSize;
+    
+    // write table to LINKEDIT
+    dyld_cache_function_variant_info* table = (dyld_cache_function_variant_info*)(this->functionVariantsOptimizer.chunk->subCacheBuffer);
+    table->version = 1;
+    table->count   = (uint32_t)this->functionVariantsOptimizer.infos.size();
+    if ( table->count > 0 )
+        memcpy(table->entries, &this->functionVariantsOptimizer.infos[0], sizeof(dyld_cache_function_variant_entry)*this->functionVariantsOptimizer.infos.size());
+}
+
 
 void SharedCacheBuilder::emitCacheDylibsTrie()
 {
@@ -7275,11 +8362,14 @@
                 switch ( region.kind ) {
                     case Region::Kind::text:
                     case Region::Kind::dynamicConfig:
+                    case Region::Kind::readOnly:
                     case Region::Kind::linkedit:
                         maxSlide = std::min(maxSlide, subCacheLimit - region.subCacheVMSize);
                         break;
+                    case Region::Kind::tproConst:
                     case Region::Kind::data:
                     case Region::Kind::dataConst:
+                    case Region::Kind::tproAuthConst:
                     case Region::Kind::auth:
                     case Region::Kind::authConst:
                         if ( firstDataRegion == nullptr )
@@ -7304,6 +8394,11 @@
     // We must be a largeContiguous cache. Others were dealt with above in the x86_64 and/or sim cases
     assert(this->config.layout.contiguous.has_value());
 
+    // Some caches have a fixed max slide
+    if ( this->config.layout.cacheFixedSlide.has_value() ) {
+        return this->config.layout.cacheFixedSlide.value();
+    }
+
     // Start off making sure we can't slide past the end of the cache
     CacheVMAddress maxVMAddress(0ULL);
     for ( const Region& region : this->subCaches.back().regions ) {
@@ -7359,30 +8454,223 @@
     }
 }
 
+Error SharedCacheBuilder::patchLinkedDylibs(CacheDylib& cacheDylib)
+{
+    if ( swiftPrespecializedDylib == nullptr )
+        return Error::none();
+
+    Diagnostics diag;
+    if ( &cacheDylib == swiftPrespecializedDylib ) {
+        // remove all but libSystem
+        cacheDylib.removeLinkedDylibs(diag);
+    } else if ( cacheDylib.installName.find("libswiftCore.dylib") != std::string_view::npos ) {
+        // add Swift prespecialized dylib dependency to libswiftCore
+        cacheDylib.addLinkedDylib(diag, *swiftPrespecializedDylib);
+    }
+
+    if ( diag.hasError() )
+        return Error("%s", diag.errorMessageCStr());
+
+    return Error::none();
+}
+
+void SharedCacheBuilder::addSubCacheFileInfo(uint64_t cacheVMAddress, PropertyList::Array &files, const SubCache &subCache) {
+    using Array         = PropertyList::Array;
+    using Integer       = PropertyList::Integer;
+    using String        = PropertyList::String;
+    using Dictionary    = PropertyList::Dictionary;
+
+    uuid_t subcacheUUID;
+    uuid_parse(subCache.uuidString, subcacheUUID);
+    auto& subCacheFile = files.addObject<Dictionary>();
+
+    subCacheFile.addObjectForKey<String>("name", options.mainCacheFileName + subCache.fileSuffix);
+    subCacheFile.addObjectForKey<PropertyList::UUID>(kDyldAtlasSharedCacheUUIDKey,subcacheUUID);
+    subCacheFile.addObjectForKey<Integer>("voff",subCache.subCacheVMAddress.rawValue()-cacheVMAddress);
+    subCacheFile.addObjectForKey<Integer>("fsze", subCache.bufferSize);
+    subCacheFile.addObjectForKey<Integer>("padr", subCache.subCacheVMAddress.rawValue());
+    auto& mappingsArray = subCacheFile.addObjectForKey<Array>(kDyldAtlasSharedCacheMappingArrayKey);
+
+    dyld_cache_header* subCacheHeader   = (dyld_cache_header*)subCache.buffer;
+    auto* mappings                      = (dyld_cache_mapping_info*)((uint8_t*)subCacheHeader + subCacheHeader->mappingOffset);
+
+    uint64_t lastAddress = 0;
+    for ( auto i = 0; i < subCacheHeader->mappingCount; ++i) {
+        auto& mapping = mappingsArray.addObject<Dictionary>();
+        mapping.addObjectForKey<Integer>(kDyldAtlasSharedCacheMappingsSizeKey, mappings[i].size);
+        mapping.addObjectForKey<Integer>(kDyldAtlasSharedCacheMappingsPreferredLoadAddressKey, mappings[i].address);
+        mapping.addObjectForKey<Integer>(kDyldAtlasSharedCacheMappingsFileOffsetKey, mappings[i].fileOffset);
+        mapping.addObjectForKey<Integer>(kDyldAtlasSharedCacheMappingsMaxProtKey, mappings[i].maxProt);
+        if (mappings[i].address + mappings[i].size > lastAddress) {
+            lastAddress = mappings[i].address + mappings[i].size;
+        }
+    }
+    subCacheFile.addObjectForKey<Integer>("size", lastAddress-subCacheHeader->sharedRegionStart);
+}
+
+void SharedCacheBuilder::addCacheAtlasInfo(PropertyList::Dictionary *cacheAtlas, const SubCache &mainCache) {
+    using Array         = PropertyList::Array;
+    using Integer       = PropertyList::Integer;
+    using String        = PropertyList::String;
+
+    uint64_t cacheVMAddress = mainCache.subCacheVMAddress.rawValue();
+    uuid_t cacheUUID;
+    uuid_parse(mainCache.uuidString, cacheUUID);
+    cacheAtlas->addObjectForKey<PropertyList::UUID>(kDyldAtlasSharedCacheUUIDKey, cacheUUID);
+    cacheAtlas->addObjectForKey<Integer>(kDyldAtlasSharedCachePreferredLoadAddressKey, mainCache.subCacheVMAddress.rawValue());
+    cacheAtlas->addObjectForKey<Integer>(kDyldAtlasSharedCacheVMSizeKey, this->totalVMSize.rawValue());
+    dyld_cache_header*   cacheHeader        = (dyld_cache_header*)mainCache.buffer;
+
+    if (!uuid_is_null(cacheHeader->symbolFileUUID)) {
+        cacheAtlas->addObjectForKey<String>(kDyldAtlasSharedCacheSymbolFileName, options.mainCacheFileName + ".symbols");
+        cacheAtlas->addObjectForKey<PropertyList::UUID>(kDyldAtlasSharedCacheSymbolFileName, cacheHeader->symbolFileUUID);
+    }
+
+    auto& files = cacheAtlas->addObjectForKey<Array>("dscs");
+    addSubCacheFileInfo(cacheVMAddress, files, mainCache);
+    for (auto& subCache : mainCache.subCaches) {
+        addSubCacheFileInfo(cacheVMAddress, files, *subCache);
+    }
+
+    // FIXME: Remove once deprecate legacy APIs that need this
+    if (strncmp(cacheHeader->magic, "dyld_v1arm64_32", 15) != 0) {
+        cacheAtlas->addObjectForKey<Integer>("psze", 4);
+    } else {
+        cacheAtlas->addObjectForKey<Integer>("psze", 8);
+    }
+}
+
+// This generates the atlas binary plist included with the shared cache
+void SharedCacheBuilder::buildAtlas() {
+    STACK_ALLOCATOR(allocator, 0);
+    using Array         = PropertyList::Array;
+    using Dictionary    = PropertyList::Dictionary;
+    using Integer       = PropertyList::Integer;
+    using String        = PropertyList::String;
+    auto propertyListEncoder            = PropertyList(allocator);
+    auto& rootDictionary                = propertyListEncoder.rootDictionary();
+    // The same plist contains both the customer and shared cache data, since they share layouts
+    // We include dictionaries at the root so they can be lookup by leaf name or UUID
+    auto& byUuidDictionary              = rootDictionary.addObjectForKey<Dictionary>("uuids");
+    auto& byNameDictionary              = rootDictionary.addObjectForKey<Dictionary>("names");
+    Dictionary* customerCacheAtlas      = nullptr;
+    Dictionary* devCacheAtlas           = nullptr;
+    Array* images                       = nullptr;
+    uuid_string_t customerCacheUUID     = {0};
+    uuid_string_t devCacheUUID          = {0};
+    std::string   customerCacheName;
+    std::string   devCacheName;
+
+    for ( const SubCache& subCache : this->subCaches ) {
+        if ( subCache.isMainCustomerCache() ) {
+            customerCacheName = options.mainCacheFileName + subCache.fileSuffix;
+            customerCacheAtlas = &byUuidDictionary.addObjectForKey<Dictionary>(subCache.uuidString);
+            byNameDictionary.insertObjectForKey(customerCacheName, *customerCacheAtlas);
+            addCacheAtlasInfo(customerCacheAtlas, subCache);
+            strcpy(customerCacheUUID, subCache.uuidString);
+        } else if ( subCache.isMainDevelopmentCache() ) {
+            devCacheName = options.mainCacheFileName + subCache.fileSuffix;
+            devCacheAtlas = &byUuidDictionary.addObjectForKey<Dictionary>(subCache.uuidString);
+            byNameDictionary.insertObjectForKey(devCacheName, *devCacheAtlas);
+            addCacheAtlasInfo(devCacheAtlas, subCache);
+            strcpy(devCacheUUID, subCache.uuidString);
+        }
+    }
+    assert((customerCacheAtlas != nullptr) || (devCacheAtlas != nullptr));
+
+    // The bplist00 format supports uniquing objects, but uniquing collections is slow. Since we know a priori the images for both cache variants
+    // will be the same we create it int he first cache atlas, and if there is a second we insert a reference to the already existing one.
+    if (customerCacheAtlas) {
+        images = &customerCacheAtlas->addObjectForKey<Array>(kDyldAtlasSharedCacheImageArrayKey);
+        if (devCacheAtlas) {
+            devCacheAtlas->insertObjectForKey(kDyldAtlasSharedCacheImageArrayKey, *images);
+        }
+    } else {
+        images = &devCacheAtlas->addObjectForKey<Array>(kDyldAtlasSharedCacheImageArrayKey);
+    }
+
+    for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
+        auto& image = images->addObject<Dictionary>();
+        auto& segments = image.addObjectForKey<Array>(kDyldAtlasImageSegmentArrayKey);
+        image.addObjectForKey<String>(kDyldAtlasImageInstallnameKey, cacheDylib.installName);
+        image.addObjectForKey<Integer>(kDyldAtlasImagePreferredLoadAddressKey, cacheDylib.cacheLoadAddress.rawValue());
+        uuid_t uuid;
+        assert(cacheDylib.cacheHdr->getUuid(uuid));
+        image.addObjectForKey<PropertyList::UUID>(kDyldAtlasImageUUIDKey, uuid);
+
+        for ( const DylibSegmentChunk& segmentChunk : cacheDylib.segments ) {
+            auto& segment = segments.addObject<Dictionary>();
+            segment.addObjectForKey<String>(kDyldAtlasSegmentNameKey, segmentChunk.name());
+            segment.addObjectForKey<Integer>(kDyldAtlasSegmentPreferredLoadAddressKey, segmentChunk.cacheVMAddress.rawValue());
+            segment.addObjectForKey<Integer>(kDyldAtlasSegmentSizeKey, segmentChunk.cacheVMSize.rawValue());
+            segment.addObjectForKey<Integer>(kDyldAtlasSegmentFileOffsetKey, segmentChunk.subCacheFileOffset.rawValue());
+            segment.addObjectForKey<Integer>(kDyldAtlasSegmentFileSizeKey, segmentChunk.subCacheFileSize.rawValue());
+            if ( strcmp("__TEXT", segmentChunk.name()) == 0 ) {
+                segment.addObjectForKey<Integer>(kDyldAtlasSegmentPermissionsKey, VM_PROT_READ | VM_PROT_EXECUTE);
+            } else if ( strcmp("__LINKEDIT", segmentChunk.name()) == 0 ) {
+                segment.addObjectForKey<Integer>(kDyldAtlasSegmentPermissionsKey, VM_PROT_READ);
+            } else {
+                segment.addObjectForKey<Integer>(kDyldAtlasSegmentPermissionsKey, VM_PROT_READ | VM_PROT_WRITE);
+            }
+        }
+    }
+
+    ByteStream fileStream(allocator);
+    propertyListEncoder.encode(fileStream);
+    AAREncoder aarEncoder(allocator);
+
+    if (customerCacheUUID[0]) {
+        std::string plistPath = std::string("caches/uuids/") + customerCacheUUID + ".plist";
+        std::string symlinkTarget = std::string("../uuids/") + customerCacheUUID + ".plist";
+        std::string symlinkSource = std::string("caches/names/") + customerCacheName + ".plist";
+
+        aarEncoder.addFile(plistPath, fileStream.span());
+        aarEncoder.addSymLink(symlinkSource, symlinkTarget);
+        if (devCacheUUID[0]) {
+            std::string devPlistPath = std::string("caches/uuids/") + devCacheUUID + ".plist";
+            std::string devSymlinkSource = std::string("caches/names/") + devCacheName + ".plist";
+            aarEncoder.addSymLink(devPlistPath, symlinkTarget);
+            aarEncoder.addSymLink(devSymlinkSource, symlinkTarget);
+        }
+    } else {
+        assert(devCacheUUID[0] != 0);
+        std::string plistPath = std::string("caches/uuids/") + devCacheUUID + ".plist";
+        std::string symlinkTarget = std::string("../uuids/") + devCacheUUID + ".plist";
+        std::string symlinkSource = std::string("caches/names/") + devCacheName + ".plist";
+
+        aarEncoder.addFile(plistPath, fileStream.span());
+        aarEncoder.addSymLink(symlinkSource, symlinkTarget);
+    }
+
+    ByteStream outputStream(allocator);
+    aarEncoder.encode(outputStream);
+    std::copy(outputStream.begin(), outputStream.end(), std::back_insert_iterator(atlasData));
+}
+
 void SharedCacheBuilder::computeCacheHeaders()
 {
     Timer::Scope timedScope(this->config, "computeCacheHeaders time");
 
-    for ( SubCache& subCache : this->subCaches )
-        subCache.writeCacheHeader(this->options, this->config, this->cacheDylibs);
-
-    // Content for the first (main) subCache only
+    // Content for all subcaches
     __block uint32_t osVersion                  = 0;
     __block uint32_t altPlatform                = 0;
     __block uint32_t altOsVersion               = 0;
+
+    // Content for the first (main) subCache only
     CacheVMAddress   dyldInCacheUnslidAddr      = CacheVMAddress(0ULL);
     CacheVMAddress   dyldInCacheEntryUnslidAddr = CacheVMAddress(0ULL);
+
     {
         // look for libdyld.dylib and record OS verson info into cache header
         for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
             if ( endsWith(cacheDylib.installName, "/libdyld.dylib") ) {
-                cacheDylib.inputMF->forEachSupportedPlatform(^(dyld3::Platform platform, uint32_t minOS, uint32_t sdk) {
-                    if ( platform == options.platform ) {
-                        osVersion = minOS;
+                cacheDylib.inputHdr->platformAndVersions().unzip(^(mach_o::PlatformAndVersions pvs) {
+                    if ( pvs.platform == options.platform ) {
+                        osVersion = pvs.minOS.value();
                     }
                     else {
-                        altPlatform  = (uint32_t)platform;
-                        altOsVersion = minOS;
+                        altPlatform = pvs.platform.value();
+                        altOsVersion = pvs.minOS.value();
                     }
                 });
             }
@@ -7391,13 +8679,17 @@
                 dyldInCacheUnslidAddr = cacheDylib.cacheLoadAddress;
                 uint64_t dyldEntryOffset;
                 bool     usesCRT;
-                if ( cacheDylib.cacheMF->getEntry(dyldEntryOffset, usesCRT) ) {
+                if ( cacheDylib.cacheHdr->getEntry(dyldEntryOffset, usesCRT) ) {
                     // the "pc" value in the LC_UNIXTHREAD was adjusted when dyld was placed in the cache
                     dyldInCacheEntryUnslidAddr = dyldInCacheUnslidAddr + VMOffset(dyldEntryOffset);
                 }
             }
         }
     }
+
+    for ( SubCache& subCache : this->subCaches )
+        subCache.writeCacheHeader(this->options, this->config, this->cacheDylibs,
+                                  osVersion, altPlatform, altOsVersion);
 
     // Fill in info for the main caches.  This must be after addCacheHeaderImageInfo().
     for ( SubCache& subCache : this->subCaches ) {
@@ -7405,11 +8697,11 @@
             subCache.addMainCacheHeaderInfo(this->options, this->config,
                                             this->cacheDylibs,
                                             this->totalVMSize, getMaxSlide(),
-                                            osVersion, altPlatform, altOsVersion,
                                             dyldInCacheUnslidAddr, dyldInCacheEntryUnslidAddr,
                                             this->dylibTrieOptimizer,
-                                            this->objcOptimizer, this->swiftProtocolConformanceOptimizer,
-                                            this->patchTableOptimizer, this->prebuiltLoaderBuilder);
+                                            this->objcOptimizer, this->swiftOptimizer,
+                                            this->patchTableOptimizer, this->functionVariantsOptimizer,
+                                            this->prebuiltLoaderBuilder, this->prewarmingOptimizer);
             continue;
         }
 
@@ -7492,7 +8784,12 @@
     return this->evictedDylibs;
 }
 
-void SharedCacheBuilder::getResults(std::vector<CacheBuffer>& results) const
+std::string_view SharedCacheBuilder::getSwiftPrespecializedDylibBuildError() const
+{
+    return swiftPrespecializedDylibBuildError;
+}
+
+void SharedCacheBuilder::getResults(std::vector<CacheBuffer>& results, std::vector<std::byte>& atlas) const
 {
     for ( const SubCache& subCache : this->subCaches ) {
         CacheBuffer buffer;
@@ -7500,6 +8797,8 @@
         buffer.bufferSize = subCache.bufferSize;
 
         buffer.cdHash = cdHashToString(subCache.cdHash);
+        if ( this->config.codeSign.mode == CodeSign::Mode::agile )
+            buffer.agilecdHash = cdHashToString(subCache.agilecdHash);
         buffer.uuid   = subCache.uuidString;
 
         buffer.cacheFileSuffix = subCache.fileSuffix;
@@ -7507,13 +8806,8 @@
         buffer.usedByCustomerConfig = subCache.shouldKeepCache(false, true);
         buffer.usedByDevelopmentConfig = subCache.shouldKeepCache(true, false);
 
-        // The builder executable also passes back the fd.  This should typically be used instead of the data buffer
-#if !SUPPORT_CACHE_BUILDER_MEMORY_BUFFERS
-        buffer.fd       = subCache.fd;
-        buffer.tempPath = subCache.tempPath;
-#endif
-
         results.push_back(std::move(buffer));
+        atlas = std::move(this->atlasData);
     }
 }
 
@@ -7529,12 +8823,15 @@
                 case Region::Kind::text:
                     prot = "EX";
                     break;
+                case Region::Kind::tproConst:
                 case Region::Kind::data:
                 case Region::Kind::dataConst:
+                case Region::Kind::tproAuthConst:
                 case Region::Kind::auth:
                 case Region::Kind::authConst:
                     prot = "RW";
                     break;
+                case Region::Kind::readOnly:
                 case Region::Kind::linkedit:
                     prot = "RO";
                     break;
@@ -7584,31 +8881,32 @@
 
     assert(mainSubCache.isMainCache());
 
-    dyld3::json::Node cacheNode;
+    json::Node cacheNode;
 
     cacheNode.map["version"].value = "1";
     cacheNode.map["disposition"].value = disposition;
-    cacheNode.map["base-address"].value = dyld3::json::hex(baseAddress);
+    cacheNode.map["arch"].value = this->options.arch.name();
+    cacheNode.map["platform"].value = mach_o::Platform(this->options.platform).name();
+    cacheNode.map["base-address"].value = json::hex(baseAddress);
     cacheNode.map["uuid"].value = mainSubCache.uuidString;
 
-    dyld3::json::Node imagesNode;
+    json::Node imagesNode;
     for ( const CacheDylib& cacheDylib : this->cacheDylibs ) {
-        dyld3::json::Node imageNode;
+        json::Node imageNode;
         imageNode.map["path"].value = cacheDylib.installName;
-        const dyld3::MachOFile* mf = cacheDylib.cacheMF;
         uuid_t uuid;
-        if ( mf->getUuid(uuid) ) {
+        if ( cacheDylib.cacheHdr->getUuid(uuid) ) {
             uuid_string_t uuidStr;
             uuid_unparse(uuid, uuidStr);
             imageNode.map["uuid"].value = uuidStr;
         }
 
-        __block dyld3::json::Node segmentsNode;
-        mf->forEachSegment(^(const dyld3::MachOAnalyzer::SegmentInfo &info, bool &stop) {
-            dyld3::json::Node segmentNode;
-            segmentNode.map["name"].value = info.segName;
-            segmentNode.map["start-vmaddr"].value = dyld3::json::hex(info.vmAddr);
-            segmentNode.map["end-vmaddr"].value = dyld3::json::hex(info.vmAddr + info.vmSize);
+        __block json::Node segmentsNode;
+        cacheDylib.cacheHdr->forEachSegment(^(const Header::SegmentInfo &info, bool &stop) {
+            json::Node segmentNode;
+            segmentNode.map["name"].value = info.segmentName;
+            segmentNode.map["start-vmaddr"].value = json::hex(info.vmaddr);
+            segmentNode.map["end-vmaddr"].value = json::hex(info.vmaddr + info.vmsize);
 
             // Add sections in verbose mode
             segmentsNode.array.push_back(segmentNode);
@@ -7689,9 +8987,9 @@
 {
     va_list list;
     va_start(list, format);
-    void* buffer = _simple_salloc();
+    _SIMPLE_STRING buffer = _simple_salloc();
     _simple_vsprintf(buffer, format, list);
-    this->warnings.push_back((const char*)buffer);
+    this->warnings.push_back(_simple_string(buffer));
     _simple_sfree(buffer);
     va_end(list);
 
@@ -7715,3 +9013,219 @@
 
     fprintf(stderr, "Didn't find a dylib with install name: %s\n", installName);
 }
+
+// This name is used only to create a placeholder input file and determine the library order.
+const std::string_view swiftPrespecializedDylibInstallName = "/usr/lib/libswiftPrespecialized.dylib";
+
+bool SharedCacheBuilder::shouldBuildSwiftPrespecializedDylib()
+{
+    if ( options.platform == Platform::driverKit )
+        return false;
+
+    if ( options.platform.isExclaveKit() )
+        return false;
+
+    // build the dylib, only if the order file is defined
+    if ( options.swiftGenericMetadataFile.empty() )
+        return false;
+
+    // check if the metadata builder is available
+#if !BUILDING_CACHE_BUILDER_UNIT_TESTS && !BUILDING_SIM_CACHE_BUILDER
+    if ( swift_externalMetadataBuilder_create == nullptr )
+        return false;
+#endif // !BUILDING_CACHE_BUILDER_UNIT_TESTS
+
+    return true;
+}
+
+Error SharedCacheBuilder::buildSwiftPrespecializedDylibJSON()
+{
+#if !BUILDING_CACHE_BUILDER_UNIT_TESTS && !BUILDING_SIM_CACHE_BUILDER
+    Timer::Scope timedScope(this->config, "buildSwiftPrespecializedDylibJSON time");
+
+    SwiftExternalMetadataBuilder* builder = swift_externalMetadataBuilder_create(options.platform.value(), options.arch.name());
+    if ( !builder )
+        return Error("swift_externalMetadataBuilder_create failed");
+
+    for ( const CacheDylib& dylib : this->cacheDylibs ) {
+        if ( dylib.inputHdr == nullptr ) continue;
+
+        // TODO: rdar://132262275 (dyld shared cache builder should tell Swift Metadata builder also about dyld)
+        if ( dylib.inputHdr->isDylinker() ) continue;
+
+        if ( const char* err = swift_externalMetadataBuilder_addDylib(builder, dylib.inputHdr->installName(),
+                (const struct mach_header*)dylib.inputHdr, dylib.inputFile->size) )
+            return Error("swift_externalMetadataBuilder_addDylib failed: %s", err);
+    }
+
+    if ( const char* err = swift_externalMetadataBuilder_readNamesJSON(builder, options.swiftGenericMetadataFile.c_str()) )
+        return Error("swift_externalMetadataBuilder_readNamesJSON failed: %s", err);
+
+    if ( const char* err = swift_externalMetadataBuilder_buildMetadata(builder) )
+        return Error("swift_externalMetadataBuilder_buildMetadata failed: %s", err);
+
+    if ( const char* json = swift_externalMetadataBuilder_getMetadataJSON(builder) )
+        swiftPrespecializedDylibJSON = json;
+    else
+        return Error("swift_externalMetadataBuilder_getMetadataJSON returned an empty JSON");
+
+    const std::string_view placeholderVersion = R"("platformVersion": "1.0")";
+    // Patch platformVersion if it's 1.0 until rdar://122585868 is fixed
+    if ( auto pos = swiftPrespecializedDylibJSON.find(placeholderVersion);
+            pos != swiftPrespecializedDylibJSON.npos ) {
+
+        __block mach_o::Version32 newMinOS;
+        // determine new deployment target based on dyld's version
+        for ( const InputFile& inputFile : allInputFiles ) {
+            if ( !inputFile.mf )
+                continue;
+
+            if ( !endsWith(inputFile.path, "dyld") )
+                continue;
+
+            ((Header*)inputFile.mf)->platformAndVersions().unzip(^(mach_o::PlatformAndVersions pvs) {
+                if ( pvs.platform == options.platform )
+                    newMinOS = pvs.minOS;
+            });
+            break;
+        }
+
+        if ( newMinOS > mach_o::Version32(1, 0) ) {
+            char verStr[32];
+            newMinOS.toString(verStr);
+            std::string newVersion = "\"platformVersion\": \"";
+            newVersion += verStr;
+            newVersion += "\"";
+
+            swiftPrespecializedDylibJSON.replace(pos, placeholderVersion.size(), newVersion);
+        }
+    }
+
+    swift_externalMetadataBuilder_destroy(builder);
+
+    if ( options.debug ) {
+        std::string path;
+        if ( const char* dir = getenv("TMPDIR") )
+            path = dir;
+        if ( path.empty() )
+            path = "/tmp";
+        path += "/swift-prespecialized.json-XXXXXX";
+
+        int outFileFd = mkstemp(path.data());
+        if ( outFileFd != -1 ) {
+            write(outFileFd, swiftPrespecializedDylibJSON.data(), swiftPrespecializedDylibJSON.size());
+        }
+    }
+#endif // !BUILDING_CACHE_BUILDER_UNIT_TESTS
+
+    return Error::none();
+}
+
+bool SharedCacheBuilder::reserveSwiftPrespecializedInputFile()
+{
+    if ( !shouldBuildSwiftPrespecializedDylib() )
+        return false;
+
+    InputFile inputFile;
+    inputFile.mf = nullptr;
+    inputFile.inode = 0;
+    inputFile.mtime = 0;
+    inputFile.path = swiftPrespecializedDylibInstallName;
+    allInputFiles.push_back(std::move(inputFile));
+    cacheDylibs.push_back(CacheDylib(swiftPrespecializedDylibInstallName));
+    return true;
+}
+
+Error SharedCacheBuilder::createSwiftPrespecializedDylib()
+{
+    if ( !shouldBuildSwiftPrespecializedDylib() )
+        return Error::none();
+
+    if ( Error err = buildSwiftPrespecializedDylibJSON() )
+        return err;
+
+    Timer::Scope timedScope(this->config, "createSwiftPrespecializedDylib time");
+
+    InputFile* inputFile = nullptr;
+    if ( allInputFiles.empty() || allInputFiles.back().path != swiftPrespecializedDylibInstallName )
+        return Error("missing input file placeholder for Swift prespecialized dylib");
+    inputFile = &allInputFiles.back();
+
+    std::vector<const char*> dylibsList;
+    // the dylib list needs to be in order of objc dylibs
+    for ( const CacheDylib* dylib : this->objcOptimizer.objcDylibs )
+        dylibsList.push_back(CString::dup(dylib->installName).c_str());
+
+    // TODO: support in-memory file buffer
+    std::string path;
+    if ( const char* dir = getenv("TMPDIR") )
+        path = dir;
+    if ( path.empty() )
+        path = "/tmp";
+    path += "/libswiftPrespecialized.dylib-XXXXXX";
+
+    int outFileFd = mkstemp(path.data());
+    if ( outFileFd == -1 )
+        return Error("couldn't create a temporary file for Swift prespecialized dylib: %s", (const char*)strerror(errno));
+
+    close(outFileFd);
+    if ( const char* err = ldMakeDylibFromJSON(swiftPrespecializedDylibJSON, dylibsList, path.c_str()) )
+        return Error("%s", err);
+
+    // cleanup dylibs list
+    for ( const char* str : dylibsList )
+        free((void*)str);
+
+    // re-open output file
+    outFileFd = open(path.c_str(), O_RDONLY);
+    if ( outFileFd < 0 )
+        return Error("could not open swift dylib file because: %s", (const char*)strerror(errno));
+
+    struct stat stat_buf;
+    if (  fstat(outFileFd, &stat_buf) == -1 )
+        return Error("could not stat swift dylib file because: %s", (const char*)strerror(errno));
+
+    vm_size_t bufferSize = stat_buf.st_size;
+    void* buffer = mmap(nullptr, bufferSize, PROT_READ, MAP_FILE | MAP_SHARED, outFileFd, 0);
+    if ( buffer == MAP_FAILED ) {
+        // Failed to mmap the file
+        return Error("could not mmap swift dylib file because: %s", (const char*)strerror(errno));
+    }
+
+    Diagnostics diag;
+    uint64_t    sliceOffset = 0;
+    inputFile->mf = MachOFile::compatibleSlice(diag, sliceOffset, inputFile->size, buffer, bufferSize, path.data(),
+                                               this->options.platform, /* isOSBinary */ false,
+                                               this->options.gradedArchs);
+    if ( diag.hasError() )
+        return Error("%s", diag.errorMessageCStr());
+
+    // recreate cache dylib at the reserved slot
+    auto cacheDylibIt = std::find_if(cacheDylibs.begin(), cacheDylibs.end(), [](CacheDylib& dylib) {
+            return dylib.inputHdr == nullptr && dylib.installName == swiftPrespecializedDylibInstallName;
+    });
+    if ( cacheDylibIt == cacheDylibs.end() )
+        return Error("missing cache dylib slot for Swift prespecialized dylib");
+
+    // save previously computed cache index
+    uint32_t index = cacheDylibIt->cacheIndex;
+    // recreate cache dylib with the updated input file
+    *cacheDylibIt = CacheDylib(*inputFile);
+    cacheDylibIt->cacheIndex = index;
+    // rdar://122906481 (Shared cache builder - explicitly model dylibs without a need for a patch table)
+    cacheDylibIt->needsPatchTable = false;
+    this->swiftPrespecializedDylib = &*cacheDylibIt;
+
+    // sanity check Swift dylib compatibility
+    __block Error err = Error::none();
+    inputFile->mf->withFileLayout(diag, ^(const mach_o::Layout& layout) {
+        mach_o::SplitSeg splitSeg(layout);
+
+        if ( !splitSeg.isV2() )
+            err = Error("Swift prespecialized dylib must use split seg V2");
+    });
+    if ( !inputFile->mf->hasChainedFixups() )
+        err = Error("Swift prespecialized dylib must use chained fixups");
+
+    return std::move(err);
+}