Loading...
libdyld/ThreadLocalVariables.h /dev/null dyld-1335
--- /dev/null
+++ dyld/dyld-1335/libdyld/ThreadLocalVariables.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2024 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+#ifndef ThreadLocalVariables_h
+#define ThreadLocalVariables_h
+
+#include <stdint.h>
+
+#include "Defines.h"
+#include "Error.h"
+#include "Header.h"
+
+// cannot include LibSystemHelpers.h because that will introduce a cycle
+namespace dyld4 {
+    struct LibSystemHelpers;
+}
+
+class DyldSharedCache;
+
+namespace dyld {
+
+using mach_o::Header;
+
+/*
+              * * * How thread-local variables work on Apple platforms * * *
+
+    A thread local variable (TLV) is a per-thread variable. It is not statically allocated in
+    __DATA segment, nor is it stack allocated.  Instead, on first use of a TLV, malloc() is used
+    to allocate space for the variable and its address is stored in a thread-specific way.  This
+    allocation is lazy, so that a thread that does not access a TLV does not have space malloc()ed.
+
+    When C (__thread) or C++ (thread_local) source code define a TLV, the compiler emits a
+    thunk in the __DATA,__thread_vars section. The first pointer in the thunk is to a function.
+    When code uses a TLV, the compiler emits code to materialize the address of the thunk, then
+    calls the first pointer in the thunk, passing the thunk's address as a parameter, and the
+    function returns the address of the TLV for the current thread.  The thunk func has special
+    calling conventions where all registers are preserved (other than the result register). That
+    means the compiler does not need to spill registers to the stack when "computing" the address
+    of a TLV.
+
+    In a .o file a thunk for "myvar" looks like:
+
+            .section __DATA,__thread_vars,thread_local_variables
+            .globl _myvar
+ _myvar:    .quad  __tlv_bootstrap
+            .quad  0
+            .quad  _myvar$tlv$init
+
+    A thunk is always three pointers in size.  The first points to a bootstrapping function.
+    The second is always zero.  The third is a pointer to the initial content for when the TLV
+    is instantiated at runtime.
+
+    The linker does a minor transformation of these thunks.  The linker finds all TLV defined in the
+    linkage unit and co-locates their initial content blobs (e.g. _myvar$tlv$init).  Thus making
+    one contiguous run of initial content.  This is so that the runtime can do just one malloc() for all
+    TLVs in the image on first use, and then do one copy of the initial content into the malloc()ed space.
+
+    At runtime dyld needs to do some load time processing of images with TLVs. Dyld needs to allocate
+    a pthread_key and stuff the pthread_key into the second slot of each thunk.  Once they are set up,
+    when the code calls the thunk func, it uses the key and pthread_getspecific() to get the
+    address of the malloc()ed space, then add the third field (offset) to return the address of
+    the specific TLV in the image.  If pthread_getspecific() returns NULL, that means this is the first
+    use of any TLV in this image on this thread. In that case, dyld needs to determine the overall
+    size to malloc() and the initial content bytes to set that to.
+
+    It turns out the slow path (first use of a TLV) required taking a lock and walking dyld data
+    structures to find the image containing the TLV and the initial content for it.
+
+    In Spring 2025 releases, an optimization was made to how TLVs work to optimize the slow path.
+    The code below implements this optimization which repacks the fields in the thunk after the
+    func pointer to contain all the info needed for the fast and slow paths.  That means dyld
+    does not need to maintain a side table and there is no need for a lock.  Each TLV is self
+    contained once set up.
+
+    There is also an optimization done in the dyld cache builder for dylibs in the dyld cache which
+    have TLVs.  Instead of have dyld setup the TLVs at runtime (which dirties pages), the dyld
+    cache builder does the setup.  It does this by using a range of reserved static pthread keys.
+
+
+ */
+
+//
+// Class for managing thread-local variables in mach-o files at runtime
+//
+class VIS_HIDDEN ThreadLocalVariables
+{
+public:
+    // on disk format of a thread-local variable
+    struct Thunk
+    {
+        void*       func;   // really void* (*ThunkFunc)(Thunk*);
+        size_t      key;
+        size_t      offset;
+    };
+
+    typedef void  (*TermFunc)(void* objAddr);
+
+    // called during libSystem initializer
+    void                    initialize();
+
+    // called by _tlv_atexit() to register a callback to be called when a thread terminates
+    void                    addTermFunc(TermFunc func, void* objAddr);
+
+    // called by libc's exit() to run all terminators
+    void                    exit();
+
+    // called by dyld when image with thread-locals is first loaded
+    mach_o::Error           setUpImage(const DyldSharedCache* cache, const Header* hdr);
+
+    // called by pthreads when a thread goes away
+    void                    finalizeList(void* list);
+
+    // called on first use of a thread local in a thread to allocate and initialize thread locals for current thread
+#if BUILDING_UNIT_TESTS
+    void*                   instantiateVariable(const Thunk&);
+#else
+    static void*            instantiateVariable(const Thunk&);
+#endif
+
+    // internal routines to prepare the thunks in an image
+    mach_o::Error           initializeThunksFromDisk(const Header* hdr);
+    mach_o::Error           initializeThunksInDyldCache(const DyldSharedCache* cache, const Header* hdr);
+
+    // runtime structure of 64-bit arch thread-local thunk
+    struct TLV_Thunkv2
+    {
+        void*        func;
+        uint32_t     key;
+        uint32_t     offset;
+        int32_t      initialContentDelta;   // if zero, then content is all zeros
+        uint32_t     initialContentSize;
+    };
+
+    // runtime structure of 32-bit arch thread-local thunk
+    struct TLV_Thunkv2_32
+    {
+        void*        func;
+        uint16_t     key;
+        uint16_t     offset;
+        int32_t      machHeaderDelta; // if < 0, content is found by walking load commands. If > 0, then it is size and content is all zeros
+    };
+
+#if BUILDING_UNIT_TESTS
+    void                    setMock(int tlvKey, std::span<Thunk> thunks, std::span<const uint8_t> content);
+#endif
+
+private:
+#if BUILDING_UNIT_TESTS
+    void                            findInitialContent(const Header* hdr, std::span<const uint8_t>& initialContent, bool& allZeroFill);
+#else
+    static void                     findInitialContent(const Header* hdr, std::span<const uint8_t>& initialContent, bool& allZeroFill);
+#endif
+    mach_o::Error                   forEachThunkSpan(const Header* hdr, mach_o::Error (^visit)(std::span<Thunk>));
+
+    // used to record _tlv_atexit() entries to clean up on thread exit
+    struct Terminator
+    {
+        TermFunc      termFunc;
+        void*         objAddr;
+    };
+
+    struct TerminatorList {
+        TerminatorList* next  = nullptr;
+        uintptr_t       count = 0;
+        Terminator      elements[7];
+        void                reverseWalkChain(void (^work)(TerminatorList*));
+    };
+
+    static const bool verbose = false;
+
+    dyld_thread_key_t               _terminatorsKey      = 0;
+#if BUILDING_UNIT_TESTS
+    dyld_thread_key_t               _key;
+    std::span<Thunk>                _thunks;
+    std::span<const uint8_t>        _initialContent;
+    bool                            _allZeroFillContent;
+#endif
+};
+
+
+#if BUILDING_LIBDYLD
+extern ThreadLocalVariables sThreadLocalVariables;
+#endif
+
+
+} // namespace dyld
+
+#endif /* ThreadLocalVariables_h */