Loading...
cache_builder/NewSharedCacheBuilder.cpp dyld-1122.1 dyld-1162
--- dyld/dyld-1122.1/cache_builder/NewSharedCacheBuilder.cpp
+++ dyld/dyld-1162/cache_builder/NewSharedCacheBuilder.cpp
@@ -112,7 +112,7 @@
 }
 
 void SharedCacheBuilder::addFile(const void* buffer, size_t bufferSize, std::string_view path,
-                                 uint64_t inode, uint64_t modTime)
+                                 uint64_t inode, uint64_t modTime, bool forceNotCacheEligible)
 {
     Diagnostics diag;
     const bool  isOSBinary = false;
@@ -120,10 +120,11 @@
                                                           this->options.platform, isOSBinary,
                                                           this->options.archs) ) {
         InputFile inputFile;
-        inputFile.mf        = mf;
-        inputFile.inode     = inode;
-        inputFile.mtime     = modTime;
-        inputFile.path      = path;
+        inputFile.mf                    = mf;
+        inputFile.inode                 = inode;
+        inputFile.mtime                 = modTime;
+        inputFile.path                  = path;
+        inputFile.forceNotCacheEligible = forceNotCacheEligible;
         allInputFiles.push_back(std::move(inputFile));
         return;
     }
@@ -135,10 +136,11 @@
                                                               dyld3::Platform::iOSMac, isOSBinary,
                                                               this->options.archs) ) {
             InputFile inputFile;
-            inputFile.mf        = mf;
-            inputFile.inode     = inode;
-            inputFile.mtime     = modTime;
-            inputFile.path      = path;
+            inputFile.mf                    = mf;
+            inputFile.inode                 = inode;
+            inputFile.mtime                 = modTime;
+            inputFile.path                  = path;
+            inputFile.forceNotCacheEligible = forceNotCacheEligible;
             allInputFiles.push_back(std::move(inputFile));
             return;
         }
@@ -491,8 +493,14 @@
 
     for ( InputFile& inputFile : this->allInputFiles ) {
         if ( inputFile.mf->isDylib() || inputFile.mf->isDyld() ) {
-            auto failureHandler = ^(const char* reason) {
-                inputFile.setError(Error("%s", reason));
+            auto failureHandler = ^(const char* format, ...) __attribute__((format(printf, 1, 2))) {
+                char*   output_string;
+                va_list list;
+                va_start(list, format);
+                vasprintf(&output_string, format, list);
+                va_end(list);
+                inputFile.setError(Error("%s", (const char*)output_string));
+                free(output_string);
             };
 
             std::string_view installName = inputFile.mf->installName();
@@ -517,7 +525,7 @@
                 }
             }
 
-            if ( inputFile.mf->canBePlacedInDyldCache(dylibPath.data(), failureHandler) ) {
+            if ( !inputFile.forceNotCacheEligible && inputFile.mf->canBePlacedInDyldCache(dylibPath.data(), failureHandler) ) {
                 CacheDylib cacheDylib(inputFile);
                 this->cacheDylibs.push_back(std::move(cacheDylib));
             }
@@ -949,14 +957,16 @@
 
 void SharedCacheBuilder::estimateIMPCaches()
 {
+    // Only LP64 is supported by the runtime
     if ( !this->config.layout.is64 )
         return;
 
     if ( this->config.layout.cacheSize.rawValue() > 0x100000000 )
         return;
 
-    // Only iOS for now
-    if ( this->options.platform != dyld3::Platform::iOS )
+    // Only arm64* are is supported by the runtime
+    std::string_view archName = this->options.archs.name();
+    if ( archName != "arm64e" && archName != "arm64")
         return;
 
     // Skip everything if the JSON file is empty
@@ -978,6 +988,27 @@
             // diag.warning("libobjc's magical IMP caches shared cache offsets list section missing (metadata not optimized)");
             return;
         }
+
+        // Also find the _objc_opt_preopt_caches_version symbol, which has the IMP caches version
+        __block Diagnostics diag;
+        cacheDylib.inputMF->withFileLayout(diag, ^(const mach_o::Layout &layout) {
+            mach_o::Layout::FoundSymbol foundInfo;
+            if ( !layout.findExportedSymbol(diag, "_objc_opt_preopt_caches_version", false, foundInfo) )
+                return;
+
+            // We only support header offsets in this dylib, as we are looking for self binds
+            // which are likely only to classes
+            if ( foundInfo.kind != mach_o::Layout::FoundSymbol::Kind::headerOffset )
+                return;
+
+            uint64_t vmAddr = layout.textUnslidVMAddr() + foundInfo.value;
+
+            __block objc_visitor::Visitor objcVisitor = makeInputDylibObjCVisitor(cacheDylib);
+            metadata_visitor::ResolvedValue value = objcVisitor.getValueFor(VMAddress(vmAddr));
+            this->objcIMPCachesOptimizer.libobjcImpCachesVersion = *(int*)value.value();
+        });
+        if ( diag.hasError() )
+            return;
     }
 
     // Find all the objc dylibs, classes, categories
@@ -2287,7 +2318,7 @@
             size += cacheDylib.inputFile->path.size() + 1;
             size = alignTo(size, alignof(dyld4::Loader::LoaderRef));
             size += sizeof(dyld4::Loader::LoaderRef) * cacheDylib.dependents.size();
-            size += sizeof(Loader::DependentKind) * cacheDylib.dependents.size();
+            size += sizeof(Loader::DependentDylibAttributes) * cacheDylib.dependents.size();
             size += sizeof(Loader::FileValidationInfo);
             size += sizeof(Loader::Region) * cacheDylib.segments.size();
 
@@ -2402,16 +2433,7 @@
 void SharedCacheBuilder::computeSubCaches()
 {
     Timer::Scope timedScope(this->config, "computeSubCaches time");
-
-    // We have 3 different kinds of caches.
-    // - regular: put everything in a single file
-    // - large: A file is (TEXT, DATA, LINKEDIT), and we might have > 1 file
-    // - split: A file is TEXT/DATA/LINKEDIT, and we've have 1 or more TEXT, and exactly 1 DATA and LINKEDIT
-    if ( config.layout.large.has_value() ) {
-        computeLargeSubCache();
-    } else {
-        computeRegularSubCache();
-    }
+    computeLargeSubCache();
 }
 
 // ObjC/Swift optimizations produce arrays, hash tables, string sections, etc.
@@ -2501,30 +2523,6 @@
 
     // Finalize the SubCache, by removing any unused regions
     subCache.removeEmptyRegions();
-}
-
-void SharedCacheBuilder::computeRegularSubCache()
-{
-    // Put everything into a single file.
-    SubCache subCache = SubCache::makeMainCache(this->options, true);
-
-    // Add all the objc tables.  This must be done before we add libobjc's __TEXT
-    this->addObjCOptimizationsToSubCache(subCache);
-
-    for ( CacheDylib& cacheDylib : this->cacheDylibs ) {
-        bool addLinkedit = true;
-        subCache.addDylib(cacheDylib, addLinkedit);
-    }
-
-    // Add all the global optimizations
-    this->addGlobalOptimizationsToSubCache(subCache);
-
-    // Reserve space in the last sub cache for dynamic config data
-    subCache.addDynamicConfigChunk();
-
-    this->addFinalChunksToSubCache(subCache);
-
-    this->subCaches.push_back(std::move(subCache));
 }
 
 // Add stubs Chunk's for every stubs section in the given text subCache
@@ -2744,7 +2742,7 @@
 
         // If we exceed the current limit, then the current subCache is complete and we need
         // to start a new one
-        if ( (subCacheTextSize + textSize) > this->config.layout.large->subCacheTextLimit ) {
+        if ( (subCacheTextSize + textSize) > this->config.layout.subCacheTextLimit ) {
             // Create a new subCache
             otherCaches.push_back(SubCache::makeSubCache(this->options));
             currentSubCache = &otherCaches.back();
@@ -2757,7 +2755,10 @@
 
         // The subCache with libobjc gets the header info sections
         // Add all the objc tables.  This must be done before we add libobjc's __TEXT
-        if ( cacheDylib.installName == "/usr/lib/libobjc.A.dylib" )
+        std::string_view libObjcInstallName = "/usr/lib/libobjc.A.dylib";
+        if ( dyld3::MachOFile::isExclaveKitPlatform(this->options.platform) )
+            libObjcInstallName = "/System/ExclaveKit/usr/lib/libobjc.A.dylib";
+        if ( cacheDylib.installName == libObjcInstallName )
             this->addObjCOptimizationsToSubCache(*currentSubCache);
 
         // We'll add LINKEDIT at the end.  As the shared region is <= 4GB in size, we can fit
@@ -3887,6 +3888,9 @@
                 break;
             case cache_builder::SlideInfo::SlideInfoFormat::v3:
                 slideInfoSize += sizeof(dyld_cache_slide_info3);
+                break;
+            case cache_builder::SlideInfo::SlideInfoFormat::v5:
+                slideInfoSize += sizeof(dyld_cache_slide_info5);
                 break;
         }
         slideInfoSize += pagesToSlide * builderConfig.slideInfo.slideInfoBytesPerDataPage;
@@ -4107,106 +4111,6 @@
         assert(this->totalVMSize == totalCustomerCacheSize);
     }
 
-    if ( this->totalVMSize > this->config.layout.cacheSize ) {
-        return Error("Cache overflow (0x%llx > 0x%llx)",
-                     this->totalVMSize.rawValue(),
-                     this->config.layout.cacheSize.rawValue());
-    }
-
-    return Error();
-}
-
-// This is the x86_64 sim layout, where each of TEXT/DATA/LINKEDIT has its own fixed address
-Error SharedCacheBuilder::computeSubCacheDiscontiguousSimVMLayout()
-{
-    // Add padding between each region, and set the Region VMAddr's
-    CacheVMAddress maxVMAddress = this->config.layout.cacheBaseAddress;
-    assert(this->subCaches.size() == 1);
-    SubCache& subCache = this->subCaches.front();
-    subCache.subCacheVMAddress = this->config.layout.cacheBaseAddress;
-
-    bool seenText = false;
-    bool seenData = false;
-    bool seenLinkedit = false;
-    bool seenDynamicConfig = false;
-    CacheVMAddress lastDataEnd;
-    CacheVMAddress linkEditEnd;
-    for ( Region& region : subCache.regions ) {
-        switch ( region.kind ) {
-            case Region::Kind::text:
-                assert(!seenText);
-                seenText = true;
-                region.subCacheVMAddress = this->config.layout.discontiguous->simTextBaseAddress;
-
-                // Check for overflow
-                if ( region.subCacheVMSize > this->config.layout.discontiguous->simTextSize ) {
-                    return Error("Overflow in text (0x%llx > 0x%llx)",
-                                 region.subCacheVMSize.rawValue(),
-                                 this->config.layout.discontiguous->simTextSize.rawValue());
-                }
-                break;
-            case Region::Kind::dataConst:
-            case Region::Kind::data:
-            case Region::Kind::auth:
-            case Region::Kind::authConst:
-                if ( seenData ) {
-                    // This data follows from the previous one
-                    region.subCacheVMAddress = lastDataEnd;
-                } else {
-                    seenData = true;
-                    region.subCacheVMAddress = this->config.layout.discontiguous->simDataBaseAddress;
-                }
-                lastDataEnd = region.subCacheVMAddress + region.subCacheVMSize;
-                break;
-            case Region::Kind::linkedit:
-                assert(!seenLinkedit);
-                seenLinkedit = true;
-                region.subCacheVMAddress = this->config.layout.discontiguous->simLinkeditBaseAddress;
-
-                // Check for overflow
-                if ( region.subCacheVMSize > this->config.layout.discontiguous->simLinkeditSize ) {
-                    return Error("Overflow in linkedit (0x%llx > 0x%llx)",
-                                 region.subCacheVMSize.rawValue(),
-                                 this->config.layout.discontiguous->simLinkeditSize.rawValue());
-                }
-                linkEditEnd = region.subCacheVMAddress + region.subCacheVMSize;
-                break;
-            case Region::Kind::dynamicConfig:
-                assert(!seenDynamicConfig);
-                seenDynamicConfig = true;
-                // Grab space right after the linkedit
-                region.subCacheVMAddress = linkEditEnd;
-                // Check for overflow
-                if ( region.subCacheVMSize > this->config.layout.discontiguous->simLinkeditSize ) {
-                    return Error("Overflow in dynamicConfig (0x%llx > 0x%llx)",
-                                 region.subCacheVMSize.rawValue(),
-                                 this->config.layout.discontiguous->simLinkeditSize.rawValue());
-                }
-                break;
-            case Region::Kind::unmapped:
-            case Region::Kind::codeSignature:
-                break;
-            case Region::Kind::numKinds:
-                assert(0);
-                break;
-        }
-
-        if ( seenData ) {
-            // Check for overflow
-            CacheVMSize dataSize(lastDataEnd.rawValue() - this->config.layout.discontiguous->simDataBaseAddress.rawValue());
-            if ( dataSize > this->config.layout.discontiguous->simDataSize ) {
-                return Error("Overflow in data (0x%llx > 0x%llx)",
-                             dataSize.rawValue(),
-                             this->config.layout.discontiguous->simDataSize.rawValue());
-            }
-        }
-
-        if ( region.needsSharedCacheReserveAddressSpace() )
-            maxVMAddress = region.subCacheVMAddress + region.subCacheVMSize;
-    }
-
-    this->totalVMSize = CacheVMSize((maxVMAddress - this->config.layout.cacheBaseAddress).rawValue());
-
     return Error();
 }
 
@@ -4296,14 +4200,105 @@
     }
 
     this->totalVMSize = CacheVMSize((vmAddress - this->config.layout.cacheBaseAddress).rawValue());
+
+    return Error();
+}
+
+void SharedCacheBuilder::evictLeafDylibs(CacheVMSize reductionTarget)
+{
+    // build a reverse map of all dylib dependencies
+    std::unordered_map<std::string_view, std::unordered_set<std::string_view>> references;
+    // Ensure we have an entry (even if it is empty)
+    for ( const CacheDylib& cacheDylib : cacheDylibs )
+        references[cacheDylib.installName] = { };
     
-    if ( this->totalVMSize > this->config.layout.cacheSize ) {
-        return Error("Cache overflow (0x%llx > 0x%llx)",
-                     this->totalVMSize.rawValue(),
-                     this->config.layout.cacheSize.rawValue());
-    }
-
-    return Error();
+    for ( const CacheDylib& cacheDylib : cacheDylibs ) {
+        for ( const CacheDylib::DependentDylib& depDylib : cacheDylib.dependents ) {
+            // Skip missing weak links
+            if ( depDylib.dylib == nullptr )
+                continue;
+            references[depDylib.dylib->installName].insert(cacheDylib.installName);
+        }
+    }
+
+    struct DylibAndSize
+    {
+        CacheDylib* dylib;
+        CacheVMSize size;
+    };
+
+    // Find the sizes of all the dylibs
+    std::vector<DylibAndSize> dylibsToSort;
+    for ( CacheDylib& cacheDylib : cacheDylibs ) {
+        CacheVMSize segsSize = CacheVMSize(0ULL);
+        for ( const DylibSegmentChunk& segment : cacheDylib.segments ) {
+            if ( segment.segmentName == "__LINKEDIT" )
+                continue;
+
+            segsSize += segment.cacheVMSize;
+        }
+        dylibsToSort.push_back({ &cacheDylib, segsSize });
+    }
+
+    // Build an ordered list of what to remove. At each step we do following
+    // 1) Find all dylibs that nothing else depends on
+    // 2a) If any of those dylibs are not in the order select the largest one of them
+    // 2b) If all the leaf dylibs are in the order file select the last dylib that appears last in the order file
+    // 3) Remove all entries to the removed file from the reverse dependency map
+    // 4) Go back to one and repeat until there are no more evictable dylibs
+    // This results in us always choosing the locally optimal selection, and then taking into account how that impacts
+    // the dependency graph for subsequent selections
+
+    std::vector<DylibAndSize> sortedDylibs;
+    bool candidateFound = true;
+    while ( candidateFound ) {
+        candidateFound = false;
+        DylibAndSize candidate;
+        uint64_t candidateOrder = 0;
+        for( const auto& dylib : dylibsToSort ) {
+            const auto& dylibRefs = references.at(dylib.dylib->installName);
+            if ( !dylibRefs.empty())
+                continue;
+
+            const auto& j = options.dylibOrdering.find(std::string(dylib.dylib->installName));
+            uint64_t order = 0;
+            if ( j != options.dylibOrdering.end() ) {
+                order = j->second;
+            } else {
+                // Not in the order file, set order sot it goes to the front of the list
+                order = UINT64_MAX;
+            }
+            if ( order > candidateOrder || (order == UINT64_MAX && candidate.size < dylib.size) ) {
+                // The new file is either a lower priority in the order file
+                // or the same priority as the candidate but larger
+                candidate = dylib;
+                candidateOrder = order;
+                candidateFound = true;
+            }
+        }
+        if (candidateFound) {
+            sortedDylibs.push_back(candidate);
+            references.erase(candidate.dylib->installName);
+            for (auto& dependent : references) {
+                (void)dependent.second.erase(candidate.dylib->installName);
+            }
+            auto j = std::find_if(dylibsToSort.begin(), dylibsToSort.end(),
+                                  [&candidate](const DylibAndSize& dylib) {
+                return candidate.dylib->installName == dylib.dylib->installName;
+            });
+            if ( j != dylibsToSort.end() ) {
+                dylibsToSort.erase(j);
+            }
+        }
+    }
+
+     // build set of dylibs that if removed will allow cache to build
+    for ( DylibAndSize& dylib : sortedDylibs ) {
+        this->evictedDylibs.push_back(dylib.dylib->inputFile->path);
+        if ( dylib.size > reductionTarget )
+            break;
+        reductionTarget -= dylib.size;
+    }
 }
 
 // In file layout, we need each Region to start page-aligned.  Within a Region, we can pack pages
@@ -4382,13 +4377,15 @@
         if ( Error error = computeSubCacheContiguousVMLayout(); error.hasError() )
             return error;
     } else {
-        if ( this->options.isSimulator() ) {
-            if ( Error error = computeSubCacheDiscontiguousSimVMLayout(); error.hasError() )
-                return error;
-        } else {
-            if ( Error error = computeSubCacheDiscontiguousVMLayout(); error.hasError() )
-                return error;
-        }
+        if ( Error error = computeSubCacheDiscontiguousVMLayout(); error.hasError() )
+            return error;
+    }
+    
+    if ( this->totalVMSize > this->config.layout.cacheSize ) {
+        evictLeafDylibs(this->totalVMSize - this->config.layout.cacheSize);
+        return Error("Cache overflow (0x%llx > 0x%llx)",
+                     this->totalVMSize.rawValue(),
+                     this->config.layout.cacheSize.rawValue());
     }
 
     // Update Section VMAddr's now that we know where all the Region's are in memory
@@ -4403,12 +4400,6 @@
                 }
             }
         }
-    }
-
-    if ( this->totalVMSize > this->config.layout.cacheSize ) {
-        return Error("Cache overflow (0x%llx > 0x%llx)",
-                     this->totalVMSize.rawValue(),
-                     this->config.layout.cacheSize.rawValue());
     }
 
     return Error();
@@ -4856,13 +4847,21 @@
 // dyld4 needs a fake "main.exe" to set up the state.
 // On macOS this *has* to come from an actual executable, as choosing a zippered
 // dylib may incorrectly lead to setting up the ProcessConfig as iOSMac.
-// Simulators don't have executables yet so choose a dylib there
+// Simulators and ExclaveKit don't have executables yet so choose a dylib there
 static const MachOFile* getFakeMainExecutable(const BuilderOptions& options,
                                               std::span<CacheDylib> cacheDylibs,
                                               std::span<InputFile*> executableFiles)
 {
     if ( options.isSimulator() ) {
         std::string_view installName = "/usr/lib/libSystem.B.dylib";
+        for ( const CacheDylib& cacheDylib : cacheDylibs ) {
+            if ( cacheDylib.installName == installName ) {
+                assert(cacheDylib.cacheMF != nullptr);
+                return cacheDylib.cacheMF;
+            }
+        }
+    } else if (options.isExclaveKit() ) {
+        std::string_view installName = "/System/ExclaveKit/usr/lib/libSystem.dylib";
         for ( const CacheDylib& cacheDylib : cacheDylibs ) {
             if ( cacheDylib.installName == installName ) {
                 assert(cacheDylib.cacheMF != nullptr);
@@ -5233,7 +5232,7 @@
                                VMAddress& protocolClassVMAddr, MachOFile::PointerMetaData& protocolClassPMD)
 {
     for ( CacheDylib* cacheDylib : objcDylibs ) {
-        if ( cacheDylib->installName == "/usr/lib/libobjc.A.dylib" ) {
+        if ( cacheDylib->installName.ends_with("/usr/lib/libobjc.A.dylib" )) {
             __block InputDylibVMAddress inputOptPtrsVMAddress;
             __block uint64_t            sectionSize = 0;
             __block bool                found       = false;
@@ -6442,7 +6441,7 @@
 
     Timer::Scope timedScope(this->config, "emitCanonicalObjCProtocols time");
 
-    const bool log = this->options.debug;
+    const bool log = false;
 
     // We need to find the Protocol class from libojc
     VMAddress                  protocolClassVMAddr;
@@ -6618,7 +6617,7 @@
 
     Timer::Scope timedScope(this->config, "computeObjCClassLayout time");
 
-    const bool log = this->options.debug;
+    const bool log = false;
 
     // We need to walk all classes in all dylibs.  Each dylib needs its own objc visitor object
     std::vector<objc_visitor::Visitor> objcVisitors;
@@ -7213,7 +7212,7 @@
 
     Diagnostics diag;
     auto objcClassOpt = (objc::ClassHashTable*)this->objcClassOptimizer.classHashTableChunk->subCacheBuffer;
-    buildSwiftHashTables(this->config, diag, this->cacheDylibs,
+    buildSwiftHashTables(this->config, diag, this->objcOptimizer.objcDylibs,
                          extraRegions, objcClassOpt,
                          this->objcOptimizer.headerInfoReadOnlyChunk->subCacheBuffer,
                          this->objcOptimizer.headerInfoReadWriteChunk->subCacheBuffer,
@@ -7335,6 +7334,10 @@
     if ( sizeUpToTextEnd <= twoGB )
         maxSlide = CacheVMSize(twoGB - sizeUpToTextEnd);
 
+    if ( this->config.layout.cacheMaxSlide.has_value() ) {
+        maxSlide = std::min(maxSlide, CacheVMSize(this->config.layout.cacheMaxSlide.value()));
+    }
+
     return maxSlide.rawValue();
 }
 
@@ -7482,6 +7485,11 @@
     for (int i = 0; i < 20; ++i)
         snprintf(&buff[2*i], sizeof(buff), "%2.2x", hash[i]);
     return buff;
+}
+
+std::span<const std::string_view> SharedCacheBuilder::getEvictedDylibs() const
+{
+    return this->evictedDylibs;
 }
 
 void SharedCacheBuilder::getResults(std::vector<CacheBuffer>& results) const