Loading...
mach_o/BindOpcodes.cpp dyld-1340 dyld-1162
--- dyld/dyld-1340/mach_o/BindOpcodes.cpp
+++ dyld/dyld-1162/mach_o/BindOpcodes.cpp
@@ -276,32 +276,30 @@
 
 void BindOpcodes::forEachBindLocation(void (^callback)(const LocAndTarget&, bool& stop)) const
 {
+    __block uint32_t bindOrdinal = 0;
     (void)forEachBind(^(const char* opcodeName, int type, bool segIndexSet, uint8_t segIndex, uint64_t segOffset, bool libraryOrdinalSet,
                   int libOrdinal, const char* symbolName, bool weakImport, int64_t addend, bool targetOrAddendChanged, bool& stop) {
         BindTarget target = {symbolName, libOrdinal, weakImport, false, addend};
         LocAndTarget info = { segIndex, segOffset, &target };
         callback(info, stop);
+        if ( targetOrAddendChanged )
+            ++bindOrdinal;
     }, ^(const char* symbolName){ });
 }
 
 void BindOpcodes::forEachBindTarget(void (^callback)(const Fixup::BindTarget& target, bool& stop),
                                     void (^strongHandler)(const char* symbolName)) const
 {
-    __block const char* lastSymbolName = nullptr;
     (void)forEachBind(^(const char* opcodeName, int type, bool segIndexSet, uint8_t segIndex, uint64_t segOffset, bool libraryOrdinalSet,
                   int libOrdinal, const char* symbolName, bool weakImport, int64_t addend, bool targetOrAddendChanged, bool& stop) {
-        // in Bind Opocdes the symbol name string remains the same, while the update location (segOffset) changes, so we can to pointer comparison
-        if ( symbolName != lastSymbolName ) {
-            Fixup::BindTarget target = {symbolName, libOrdinal, weakImport, addend};
-            callback(target, stop);
-            lastSymbolName = symbolName;
-        }
+        Fixup::BindTarget target = {symbolName, libOrdinal, weakImport, addend};
+        callback(target, stop);
     }, strongHandler);
 }
 
-uint32_t BindOpcodes::forEachBindLocation(std::span<const MappedSegment> segments, uint32_t startBindOrdinal, void (^callback)(const Fixup& fixup, bool& stop)) const
-{
-    __block uint32_t bindOrdinal = startBindOrdinal;
+void BindOpcodes::forEachBindLocation(std::span<const MappedSegment> segments, void (^callback)(const Fixup& fixup, bool& stop)) const
+{
+    __block uint32_t bindOrdinal = 0;
     (void)forEachBind(^(const char* opcodeName, int type, bool segIndexSet, uint8_t segIndex, uint64_t segOffset, bool libraryOrdinalSet,
                   int libOrdinal, const char* symbolName, bool weakImport, int64_t addend, bool targetOrAddendChanged, bool& stop) {
         uint8_t* loc = (uint8_t*)(segments[segIndex].content) + segOffset;
@@ -310,7 +308,6 @@
         if ( targetOrAddendChanged )
             ++bindOrdinal;
     }, ^(const char* symbolName){ });
-    return bindOrdinal;
 }
 
 void BindOpcodes::printOpcodes(FILE* output, int indentCount) const
@@ -421,6 +418,328 @@
     }
 }
 
+
+#if BUILDING_MACHO_WRITER
+
+void BindOpcodes::append_byte(uint8_t value)
+{
+    _opcodes.push_back(value);
+}
+
+void BindOpcodes::append_uleb128(uint64_t value)
+{
+    uint8_t byte;
+    do {
+        byte = value & 0x7F;
+        value &= ~0x7F;
+        if ( value != 0 )
+            byte |= 0x80;
+        _opcodes.push_back(byte);
+        value = value >> 7;
+    } while( byte >= 0x80 );
+}
+
+void BindOpcodes::append_sleb128(int64_t value)
+{
+    bool isNeg = ( value < 0 );
+    uint8_t byte;
+    bool more;
+    do {
+        byte = value & 0x7F;
+        value = value >> 7;
+        if ( isNeg )
+            more = ( (value != -1) || ((byte & 0x40) == 0) );
+        else
+            more = ( (value != 0) || ((byte & 0x40) != 0) );
+        if ( more )
+            byte |= 0x80;
+        _opcodes.push_back(byte);
+    }
+    while( more );
+}
+
+void BindOpcodes::append_string(const char* str)
+{
+    for (const char* s = str; *s != '\0'; ++s)
+        _opcodes.push_back(*s);
+    _opcodes.push_back('\0');
+}
+
+static void sortBindOpcodes(std::span<BindOpcodes::LocAndTarget> binds)
+{
+    std::sort(binds.begin(), binds.end(), [](const BindOpcodes::LocAndTarget& a, const BindOpcodes::LocAndTarget& b) {
+        // sort by library, symbol, type, flags, then address
+        if ( a.target != b.target ) {
+            if ( a.target->libOrdinal != b.target->libOrdinal )
+                return (a.target->libOrdinal < b.target->libOrdinal );
+            if ( a.target->symbolName != b.target->symbolName )
+                return ( strcmp(a.target->symbolName, b.target->symbolName) < 0 );
+            // TODO: type, if we ever need it, which we probably don't
+            //if ( this->_type != rhs._type )
+            //    return  (this->_type < rhs._type );
+            // weak import set the flags field
+            if ( a.target->weakImport != b.target->weakImport )
+                return a.target->weakImport;
+            if ( a.target->addend != b.target->addend )
+                return (a.target->addend < b.target->addend );
+        }
+        // Sort by seg index and seg offset, which gives us address
+        if ( a.segIndex != b.segIndex )
+            return (a.segIndex < b.segIndex );
+        return (a.segOffset < b.segOffset );
+    });
+}
+
+
+// to work with dyld2's algorithm, all weak-bind opcodes need to be sorted by symbol name
+static void sortWeakBindOpcodes(std::span<BindOpcodes::LocAndTarget> binds)
+{
+    std::sort(binds.begin(), binds.end(), [](const BindOpcodes::LocAndTarget& a, const BindOpcodes::LocAndTarget& b) {
+        // sort by symbol name then address
+        if ( a.target != b.target ) {
+            if ( a.target->symbolName != b.target->symbolName )
+                return ( strcmp(a.target->symbolName, b.target->symbolName) < 0 );
+        }
+        // Sort by seg index and seg offset, which gives us address
+        if ( a.segIndex != b.segIndex )
+            return (a.segIndex < b.segIndex );
+        return (a.segOffset < b.segOffset );
+    });
+}
+
+BindOpcodes::BindOpcodes(std::span<LocAndTarget> binds, bool is64, BuilderKind kind, LazyStartRecorder lazyStartsRecorder)
+  : _opcodesStart(nullptr), _opcodesEnd(nullptr), _pointerSize(is64 ? 8 : 4)
+{
+    if ( binds.empty() )
+        return;
+
+	struct BindTmp
+	{
+		BindTmp(uint8_t op, uint64_t p1, uint64_t p2=0, const char* s=NULL)
+			: opcode(op), operand1(p1), operand2(p2), name(s) {}
+		uint8_t		opcode;
+		uint64_t	operand1;
+		uint64_t	operand2;
+		const char*	name;
+	};
+
+	// convert to temp encoding that can be more easily optimized
+	std::vector<BindTmp> mid;
+	uint32_t    curSegIndex  = 0;
+    uint64_t    curSegOffset = -1;
+	int         ordinal      = (kind == BuilderKind::weak) ? -3 : 0x80000000;
+	const char* symbolName   = nullptr;
+	int64_t     addend       = 0;
+    uint8_t     type         = 0;
+
+    if ( kind == BuilderKind::lazy ) {
+        for (const LocAndTarget& bind : binds) {
+            mid.emplace_back(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, bind.segIndex, bind.segOffset);
+            if ( bind.target->libOrdinal <= 0 ) {
+                // special lookups are encoded as negative numbers in BindingInfo
+                mid.emplace_back(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, bind.target->libOrdinal);
+            }
+            else if ( bind.target->libOrdinal<= 15 ) {
+                // small libOrdinals can fit in opcode
+                mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM, bind.target->libOrdinal);
+            }
+            else {
+                mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, bind.target->libOrdinal);
+            }
+            uint32_t flags = (bind.target->weakImport ? BIND_SYMBOL_FLAGS_WEAK_IMPORT : 0);
+            mid.emplace_back(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, flags, 0, bind.target->symbolName);
+            mid.emplace_back(BIND_OPCODE_DO_BIND, 0);
+            mid.emplace_back(BIND_OPCODE_DONE, 0);
+        }
+    }
+    else {
+        // sort by library, symbol, type, then address
+        if ( kind == BuilderKind::weak )
+            sortWeakBindOpcodes(binds);
+        else
+            sortBindOpcodes(binds);
+        for (const LocAndTarget& bind : binds) {
+            // weak binds don't have ordinals
+            if ( kind != BuilderKind::weak ) {
+                if ( ordinal != bind.target->libOrdinal ) {
+                    if ( bind.target->libOrdinal <= 0 ) {
+                        // special lookups are encoded as negative numbers in BindingInfo
+                        mid.emplace_back(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, bind.target->libOrdinal);
+                    }
+                    else {
+                        mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, bind.target->libOrdinal);
+                    }
+                    ordinal = bind.target->libOrdinal;
+                }
+            }
+            if ( (symbolName == nullptr) || (strcmp(symbolName, bind.target->symbolName) != 0) ) {
+                uint32_t flags = 0;
+                if ( bind.target->weakImport )
+                    flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
+                if ( bind.target->strongOverrideOfWeakDef )
+                    flags |= BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION;
+                mid.emplace_back(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, flags, 0, bind.target->symbolName);
+                symbolName = bind.target->symbolName;
+
+                // We don't emit any other opcodes for strong overrides, so skip to the next bind
+                if ( bind.target->strongOverrideOfWeakDef )
+                    continue;
+            }
+            if ( type != BIND_TYPE_POINTER ) {
+                // Note: this code base does not support creating old i386 text fixups
+                mid.emplace_back(BIND_OPCODE_SET_TYPE_IMM, BIND_TYPE_POINTER);
+                type = BIND_TYPE_POINTER;
+            }
+            if ( curSegIndex != bind.segIndex ) {
+                mid.emplace_back(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, bind.segIndex, bind.segOffset);
+                curSegIndex  = bind.segIndex;
+                curSegOffset = bind.segOffset;
+            }
+            else if ( curSegOffset != bind.segOffset ) {
+                mid.emplace_back(BIND_OPCODE_ADD_ADDR_ULEB, bind.segOffset - curSegOffset);
+                curSegOffset = bind.segOffset;
+            }
+            if ( addend != bind.target->addend ) {
+                mid.emplace_back(BIND_OPCODE_SET_ADDEND_SLEB, bind.target->addend);
+                addend = bind.target->addend;
+            }
+            mid.emplace_back(BIND_OPCODE_DO_BIND, 0);
+            curSegOffset += _pointerSize;
+        }
+        mid.emplace_back(BIND_OPCODE_DONE, 0);
+
+        // optimize phase 1, combine bind/add pairs
+        BindTmp* dst = &mid[0];
+        for (const BindTmp* src = &mid[0]; src->opcode != BIND_OPCODE_DONE; ++src) {
+            if ( (src->opcode == BIND_OPCODE_DO_BIND)
+                && (src[1].opcode == BIND_OPCODE_ADD_ADDR_ULEB) ) {
+                dst->opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
+                dst->operand1 = src[1].operand1;
+                ++src;
+                ++dst;
+            }
+            else {
+                *dst++ = *src;
+            }
+        }
+        dst->opcode = BIND_OPCODE_DONE;
+
+        // optimize phase 2, compress packed runs of BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB with
+        // same addr delta into one BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
+        dst = &mid[0];
+        for (const BindTmp* src = &mid[0]; src->opcode != BIND_OPCODE_DONE; ++src) {
+            uint64_t delta = src->operand1;
+            if ( (src->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
+                    && (src[1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
+                    && (src[1].operand1 == delta) ) {
+                // found at least two in a row, this is worth compressing
+                dst->opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
+                dst->operand1 = 1;
+                dst->operand2 = delta;
+                ++src;
+                while ( (src->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
+                        && (src->operand1 == delta) ) {
+                    dst->operand1++;
+                    ++src;
+                }
+                --src;
+                ++dst;
+            }
+            else {
+                *dst++ = *src;
+            }
+        }
+        dst->opcode = BIND_OPCODE_DONE;
+
+        // optimize phase 3, use immediate encodings
+        for (BindTmp* p = &mid[0]; p->opcode != REBASE_OPCODE_DONE; ++p) {
+            if ( (p->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
+                && (p->operand1 < (15*_pointerSize))
+                && ((p->operand1 % _pointerSize) == 0) ) {
+                p->opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
+                p->operand1 = p->operand1/_pointerSize;
+            }
+            else if ( (p->opcode == BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB) && (p->operand1 <= 15) ) {
+                p->opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_IMM;
+            }
+        }
+        dst->opcode = BIND_OPCODE_DONE;
+    }
+
+	// convert to compressed encoding
+	_opcodes.reserve(256);
+	bool done = false;
+    size_t entryStartOffset = 0;
+	for (const BindTmp& tmp : mid) {
+		switch ( tmp.opcode ) {
+			case BIND_OPCODE_DONE:
+                append_byte(BIND_OPCODE_DONE);
+                if ( kind != BuilderKind::lazy )
+                    done = true;
+				break;
+			case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
+				append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | tmp.operand1);
+				break;
+			case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
+				append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+				append_uleb128(tmp.operand1);
+				break;
+			case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
+				append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | (tmp.operand1 & BIND_IMMEDIATE_MASK));
+				break;
+			case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
+                if ( lazyStartsRecorder )
+                    lazyStartsRecorder(entryStartOffset, tmp.name);
+				append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | tmp.operand1);
+				append_string(tmp.name);
+				break;
+			case BIND_OPCODE_SET_TYPE_IMM:
+				append_byte(BIND_OPCODE_SET_TYPE_IMM | tmp.operand1);
+				break;
+			case BIND_OPCODE_SET_ADDEND_SLEB:
+				append_byte(BIND_OPCODE_SET_ADDEND_SLEB);
+				append_sleb128(tmp.operand1);
+				break;
+			case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+                if ( lazyStartsRecorder )
+                    entryStartOffset = _opcodes.size();
+                append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | tmp.operand1);
+				append_uleb128(tmp.operand2);
+				break;
+			case BIND_OPCODE_ADD_ADDR_ULEB:
+				append_byte(BIND_OPCODE_ADD_ADDR_ULEB);
+				append_uleb128(tmp.operand1);
+				break;
+			case BIND_OPCODE_DO_BIND:
+				append_byte(BIND_OPCODE_DO_BIND);
+				break;
+			case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
+				append_byte(BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB);
+				append_uleb128(tmp.operand1);
+				break;
+			case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
+				append_byte(BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED | tmp.operand1 );
+				break;
+			case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
+				append_byte(BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB);
+				append_uleb128(tmp.operand1);
+				append_uleb128(tmp.operand2);
+				break;
+		}
+        if ( done )
+            break;
+    }
+
+	// align to pointer size
+    while ( (_opcodes.size() % _pointerSize) != 0 )
+        append_byte(0);
+
+    _opcodesStart = &_opcodes[0];
+    _opcodesEnd   = &_opcodes[_opcodes.size()];
+}
+#endif // BUILDING_MACHO_WRITER
+
+
 //
 // MARK: --- LazyBindOpcodes inspection methods ---
 //