Loading...
libdyld/ThreadLocalVariables.cpp /dev/null dyld-1284.13
--- /dev/null
+++ dyld/dyld-1284.13/libdyld/ThreadLocalVariables.cpp
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2024 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdint.h>
+
+#include "Defines.h"
+#include "Header.h"
+#include "Error.h"
+#include "LibSystemHelpers.h"
+#include "ThreadLocalVariables.h"
+#include "DyldSharedCache.h"
+
+using dyld4::LibSystemHelpers;
+using mach_o::Error;
+
+extern "C" void* _tlv_get_addr(dyld::ThreadLocalVariables::Thunk*);
+
+namespace dyld {
+
+
+
+// call by dyld via libSystemHelpers->setUpThreadLocals() at launch and during dlopen()
+Error ThreadLocalVariables::setUpImage(const DyldSharedCache* cache, const Header* hdr)
+{
+// driverkit and main OS use same dyld, but driverkit process do not support thread locals
+#if __has_feature(tls)
+    if ( hdr->inDyldCache() ) {
+        if ( Error err = this->initializeThunksInDyldCache(cache, hdr) )
+            return err;
+    }
+    else {
+        if ( Error err = this->initializeThunksFromDisk(hdr) )
+            return err;
+    }
+    return Error::none();
+#else
+    return Error::none();
+#endif // __has_feature(tls)
+}
+
+#if __has_feature(tls)
+void ThreadLocalVariables::findInitialContent(const Header* hdr, std::span<const uint8_t>& initialContent, bool& allZeroFill)
+{
+    allZeroFill = true;
+#if BUILDING_UNIT_TESTS
+    allZeroFill    = _allZeroFillContent;
+    initialContent = _initialContent;
+#else
+    // find initial content for all TLVs in image
+    intptr_t slide = (intptr_t)hdr->getSlide();
+    hdr->forEachSection(^(const Header::SectionInfo& sectInfo, bool& stop) {
+        switch (sectInfo.flags & SECTION_TYPE) {
+            case S_THREAD_LOCAL_REGULAR:
+                allZeroFill = false;
+                [[clang::fallthrough]];
+            case S_THREAD_LOCAL_ZEROFILL:
+                if ( initialContent.empty() ) {
+                    // first of N contiguous TLV template sections, record as if this was only section
+                    initialContent = std::span<const uint8_t>((const uint8_t*)(sectInfo.address + slide), (size_t)sectInfo.size);
+                }
+                else {
+                    // non-first of N contiguous TLV template sections, accumlate values
+                    size_t newSize = (uintptr_t)sectInfo.address + (uintptr_t)sectInfo.size + slide - (uintptr_t)initialContent.data();
+                    initialContent = std::span<const uint8_t>(initialContent.data(), newSize);
+                }
+                break;
+        }
+    });
+#endif
+}
+
+// most images have just one __thread_vars section, but some have one in __DATA and one in __DATA_DIRTY
+Error ThreadLocalVariables::forEachThunkSpan(const Header* hdr, Error (^visit)(std::span<Thunk>))
+{
+#if BUILDING_UNIT_TESTS
+    return visit(_thunks);
+#else
+    __block Error setUpErr;
+    // find section with array of TLV thunks
+    // and also initial content for all TLVs in image
+    intptr_t                         slide   = (intptr_t)hdr->getSlide();
+    hdr->forEachSection(^(const Header::SectionInfo& sectInfo, bool& stop) {
+        if ( (sectInfo.flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES ) {
+            if ( sectInfo.size % sizeof(Thunk) != 0) {
+                setUpErr = Error("size (%llu) of thread-locals section %.*s is not a multiple of %lu",
+                                 sectInfo.size, (int)sectInfo.sectionName.size(), sectInfo.sectionName.data(), sizeof(Thunk));
+                stop = true;
+                return;
+            }
+            if ( sectInfo.size >= sizeof(Thunk) ) {
+                std::span<Thunk> thunks = std::span<Thunk>((Thunk*)(sectInfo.address + slide), (size_t)(sectInfo.size/sizeof(Thunk)));
+                setUpErr = visit(thunks);
+            }
+        }
+    });
+    return std::move(setUpErr);
+#endif
+}
+
+static void finalizeListTLV(void* list)
+{
+#if BUILDING_LIBDYLD
+    // Called by libc/pthreads when the current thread is going away
+    sThreadLocalVariables.finalizeList(list);
+#endif // BUILDING_LIBDYLD
+}
+
+
+// This is called during libSystem initialization, which passes libSystemHelpers from libdyld down to dyld.
+// _libSystem_initialize() -> _dyld_initialize() -> APIs::_libdyld_initialize() -> ThreadLocalVariables::initialize()
+void ThreadLocalVariables::initialize()
+{
+    // assign pthread_key for per-thread terminators
+    // Note: if a thread is terminated, the value for this key is cleaned up by calling finalizeList()
+    dyld_thread_key_create(&_terminatorsKey, &finalizeListTLV);
+}
+
+Error ThreadLocalVariables::initializeThunksFromDisk(const Header* hdr)
+{
+    // each dylib gets a new key used for all thread-locals in that dylib
+    dyld_thread_key_t key;
+#if BUILDING_UNIT_TESTS
+    key = _key;
+#else
+    if ( dyld_thread_key_create(&key, &::free) )
+        return Error("pthread_key_create() failed");
+#endif
+
+    // find initial content for all TLVs in image
+    std::span<const uint8_t>  initialContent;
+    bool                      allZeroFill;
+    findInitialContent(hdr, initialContent, allZeroFill);
+
+    // set the thunk function pointer and key for every thread local variable
+    Error err = forEachThunkSpan(hdr, ^(std::span<Thunk> thunks) {
+        for ( Thunk& thunk : thunks ) {
+            uintptr_t    offset  = thunk.offset;
+            if ( offset > initialContent.size() )
+                return Error("malformed thread-local, offset=0x%lX is larger than total size=0x%lX", thunk.offset, initialContent.size() );
+#if __LP64__
+            if ( initialContent.size() > 0xFFFFFFFFUL )
+                return Error("unsupported thread-local, larger than 4GB");
+            if ( key > 0xFFFFFFFFUL )
+                return Error("thread_key_t %lu, larger than uint32_t", key);
+            TLV_Thunkv2* thunkv2 = (TLV_Thunkv2*)&thunk;
+            thunkv2->func                = (void*)&_tlv_get_addr;
+            thunkv2->key                 = (uint32_t)key;
+            thunkv2->offset              = (uint32_t)offset;
+            thunkv2->initialContentDelta = (uint32_t)(initialContent.data() - (uint8_t*)(&thunkv2->initialContentDelta));
+            thunkv2->initialContentSize  = (uint32_t)initialContent.size();
+            // if initial content is all zeros, no need to store delta to initialContent
+            if ( allZeroFill )
+                thunkv2->initialContentDelta = 0;
+            if (verbose) fprintf(stderr, "initializeThunksFromDisk(%p): thunk=%p, key=%d, offset=0x%08X, delta=0x%08X, size=%d\n",
+                                 hdr, thunkv2, thunkv2->key, thunkv2->offset, thunkv2->initialContentDelta, thunkv2->initialContentSize);
+#else
+            if ( key > 0xFFFF )
+                return Error("thread_key_t %lu, larger than uint16_t", key);
+            TLV_Thunkv2_32* thunkv2 = (TLV_Thunkv2_32*)&thunk;
+            thunkv2->func                = (void*)&_tlv_get_addr;
+            thunkv2->key                 = (uint16_t)key;
+            thunkv2->offset              = (uint16_t)offset;
+            // if initial content is all zeros, store size, otherwise store delta to mach_header so runtime can find __thread_ sections
+            if ( allZeroFill ) {
+                if ( initialContent.size() > 0x7FFFFFFF )
+                    return Error("unsupported thread-local, larger than 2GB of zero-fill");
+                thunkv2->machHeaderDelta = (uint32_t)initialContent.size();
+            }
+            else {
+                thunkv2->machHeaderDelta = (int32_t)((uint8_t*)hdr - (uint8_t*)&thunkv2->machHeaderDelta);
+            }
+            if (verbose) fprintf(stderr, "initializeThunksFromDisk(%p): thunk=%p, key=%d, offset=0x%04X, machHeaderDelta=0x%08X\n",
+                                hdr, thunkv2, thunkv2->key, thunkv2->offset, thunkv2->machHeaderDelta);
+#endif
+        }
+        return Error::none();
+    });
+    return err;
+}
+
+Error ThreadLocalVariables::initializeThunksInDyldCache(const DyldSharedCache* cache, const Header* hdr)
+{
+    // if cache builder runs out of static keys, it leaves the thunks looking like they do on disk (key == 0)
+    __block bool notOptimized = false;
+    Error err = forEachThunkSpan(hdr, ^(std::span<Thunk> thunks) {
+        int staticKey = 0;
+        // FIXME: simplify once new cache format is standard
+        if ( cache->header.newFormatTLVs ) {
+#if __LP64__
+            TLV_Thunkv2*    thunkv2 = (TLV_Thunkv2*)&thunks[0];
+#else
+            TLV_Thunkv2_32* thunkv2 = (TLV_Thunkv2_32*)&thunks[0];
+#endif
+            staticKey = thunkv2->key;
+        }
+        else {
+            staticKey = (int)thunks[0].key;
+        }
+        if ( staticKey == 0 ) {
+            notOptimized = true;
+            if (verbose) fprintf(stderr, "  initializeThunksInDyldCache(%p) thunks=%p not optimized in dyld cache\n", hdr, &thunks[0]);
+            return Error::none();
+        }
+        else {
+            // dyld cache builder assigned a static key for these TLVs but we need to register
+            // that free() should be called on the key's value if the thread goes away
+            dyld_thread_key_init_np(staticKey, &::free);
+        }
+        // thunks in the dyld shared cache are normally correct, but we may need to be correct them if root of libdyld.dylib is in use
+        void* getAddrFunc = (void*)&_tlv_get_addr;
+        for ( Thunk& thunk : thunks ) {
+            if ( thunk.func!= getAddrFunc )
+                thunk.func = getAddrFunc;
+        }
+        return Error::none();
+    });
+    if ( notOptimized )
+        return initializeThunksFromDisk(hdr);
+
+    if ( err.hasError() )
+        return err;
+
+    if ( !cache->header.newFormatTLVs )
+        return Error("dyld cache thread-local format too old");
+
+    return Error::none();;
+}
+
+void ThreadLocalVariables::addTermFunc(TermFunc func, void* objAddr)
+{
+    // NOTE: this does not need locks because it only operates on current thread data
+    TerminatorList* list = (TerminatorList*)::dyld_thread_getspecific(_terminatorsKey);
+    if ( list == nullptr ) {
+        // Note: use system malloc because it is thread safe and does not require dyld's allocator to be made r/w
+        list = (TerminatorList*)::malloc(sizeof(TerminatorList));
+        bzero(list, sizeof(TerminatorList));
+        dyld_thread_setspecific(_terminatorsKey, list);
+    }
+    // go to end of chain
+    while (list->next != nullptr)
+        list = list->next;
+    // make sure there is space to add another element
+    if ( list->count == 7 ) {
+        // if list is full, add a chain
+        TerminatorList* nextList = (TerminatorList*)::malloc(sizeof(TerminatorList));
+        bzero(nextList, sizeof(TerminatorList));
+        list->next = nextList;
+        list = nextList;
+    }
+    list->elements[list->count++] = { func, objAddr };
+}
+
+// <rdar://problem/13741816>
+// called by exit() before it calls cxa_finalize() so that thread_local
+// objects are destroyed before global objects.
+// Note this is only called on macOS, and by libc.
+// iOS only destroys tlv's when each thread is destroyed and libpthread calls
+// tlv_finalize as that is the pointer we provided when we created the key
+void ThreadLocalVariables::exit()
+{
+    if ( TerminatorList* list = (TerminatorList*)::dyld_thread_getspecific(_terminatorsKey) ) {
+        // detach storage from thread while freeing it
+        dyld_thread_setspecific(_terminatorsKey, nullptr);
+        // Note, if new thread locals are added to our during this termination,
+        // they will be on a new list, but the list we have here
+        // is one we own and need to destroy it
+        this->finalizeList(list);
+    }
+}
+
+void ThreadLocalVariables::TerminatorList::reverseWalkChain(void (^visit)(TerminatorList*))
+{
+    if ( this->next != nullptr )
+        this->next->reverseWalkChain(visit);
+    visit(this);
+}
+
+// on entry, libc has set the TSD slot to nullptr and passed us the previous value
+// this is done to handle destructors that re-animate the key value
+void ThreadLocalVariables::finalizeList(void* l)
+{
+    TerminatorList* list = (TerminatorList*)l;
+    // call term functions in reverse order of construction
+    list->reverseWalkChain(^(TerminatorList* chain) {
+        for ( uintptr_t i = chain->count; i > 0; --i ) {
+            const Terminator& entry = chain->elements[i - 1];
+            if ( entry.termFunc != nullptr )
+                (*entry.termFunc)(entry.objAddr);
+
+            // If a new tlv was added via tlv_atexit during the termination function just called, then we need to immediately destroy it
+            TerminatorList* newlist = (TerminatorList*)(::dyld_thread_getspecific(_terminatorsKey));
+            if ( newlist != nullptr ) {
+                // Set the list to NULL so that if yet another tlv is registered, we put it in a new list
+                dyld_thread_setspecific(_terminatorsKey, nullptr);
+                this->finalizeList(newlist);
+            }
+        }
+    });
+
+    // free entire chain
+    list->reverseWalkChain(^(TerminatorList* chain) {
+        ::free(chain);
+    });
+}
+
+// called lazily when TLV is first accessed
+void* ThreadLocalVariables::instantiateVariable(const Thunk& thunk)
+{
+#if TARGET_OS_EXCLAVEKIT
+    // On ExclaveKit, the assembly code for _tlv_get_addr cannot access thread specific data
+    // instead we access it here from C.
+    TLV_Thunkv2* ekThunk = (TLV_Thunkv2*)&thunk;
+    if ( void* result = ::tss_get(ekThunk->key) )
+        return result;
+#endif // TARGET_OS_EXCLAVEKIT
+
+    void*               buffer = nullptr;
+    dyld_thread_key_t   key    = 0;
+#if __LP64__
+    TLV_Thunkv2* thunkv2 = (TLV_Thunkv2*)&thunk;
+    key    = thunkv2->key;
+    if ( thunkv2->initialContentDelta != 0 ) {
+        // initial content of thread-locals is non-zero so copy initial bytes from template
+        buffer = ::malloc(thunkv2->initialContentSize);
+        const uint8_t* initialContent = (uint8_t*)(&thunkv2->initialContentDelta) + thunkv2->initialContentDelta;
+        memcpy(buffer, initialContent, thunkv2->initialContentSize);
+        if (verbose) fprintf(stderr, "instantiateVariable(%p) buffer=%p, init-content=%p size=%d\n", &thunk, buffer, initialContent, thunkv2->initialContentSize);
+    }
+    else {
+        // initial content of thread-locals is all zeros
+        buffer = ::calloc(thunkv2->initialContentSize, 1);
+        if (verbose) fprintf(stderr, "instantiateVariable(%p) buffer=%p, zero-fill, size=%d\n", &thunk, buffer, thunkv2->initialContentSize);
+    }
+#else
+    TLV_Thunkv2_32* thunkv2 = (TLV_Thunkv2_32*)&thunk;
+    if ( thunkv2->machHeaderDelta < 0 ) {
+        // in non-zerofill case, machHeaderDelta is delta to mach_header
+        key    = thunkv2->key;
+        std::span<const uint8_t> bytes((uint8_t*)&thunkv2->machHeaderDelta + thunkv2->machHeaderDelta, -thunkv2->machHeaderDelta);
+        if ( const Header* hdr = Header::isMachO(bytes) ) {
+            std::span<const uint8_t>  initialContent;
+            bool                      allZeroFill;
+            findInitialContent(hdr, initialContent, allZeroFill);
+            if ( initialContent.empty() ) {
+                fprintf(stderr, "ThreadLocalVariables::getInitialContent(%p) failed\n", hdr);
+                return nullptr;  // abort? something has gone wrong
+            }
+            buffer = ::malloc(initialContent.size());
+            memcpy(buffer, initialContent.data(), initialContent.size());
+            if (verbose) fprintf(stderr, "instantiateVariable(%p) buffer=%p, init-content=%p size=%lu\n", thunkv2, buffer, initialContent.data(), initialContent.size());
+        }
+        else {
+            fprintf(stderr, "ThreadLocalVariables::instantiateVariable(%p) cannot find mach-o header\n", thunkv2);
+            return nullptr;  // abort? something has gone wrong
+        }
+    }
+    else {
+        // in zerofill case, machHeaderDelta is size to allocate
+        buffer = ::calloc(thunkv2->machHeaderDelta, 1);
+        key    = thunkv2->key;
+        if (verbose) fprintf(stderr, "instantiateVariable(%p) buffer=%p, zero-fill, size=%d\n", thunkv2, buffer, thunkv2->machHeaderDelta);
+    }
+#endif
+
+    // set this thread's value for key to be the new buffer.
+    dyld_thread_setspecific(key, buffer);
+
+    return buffer;
+}
+#endif // __has_feature(tls)
+
+
+#if BUILDING_LIBDYLD
+ThreadLocalVariables sThreadLocalVariables;
+#endif
+
+#if BUILDING_UNIT_TESTS
+void ThreadLocalVariables::setMock(int tlvKey, std::span<Thunk> thunks, std::span<const uint8_t> content)
+{
+    _key                = tlvKey;
+    _thunks             = thunks;
+    _initialContent     = content;
+    _allZeroFillContent = true;
+    for (uint8_t byte : content) {
+        if ( byte != 0 ) {
+            _allZeroFillContent = false;
+            break;
+        }
+    }
+}
+#endif
+
+
+} // namespace