Loading...
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
/*
 * Copyright (c) 2021 Apple Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this
 * file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_LICENSE_HEADER_END@
 */

#include <sys/types.h>
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>

#include "Array.h"

#include "BindOpcodesWriter.h"
#include "Misc.h"
#include "Image.h"

using dyld3::Array;
typedef mach_o::BindOpcodes::LocAndTarget LocAndTarget;

namespace mach_o {

//
// MARK: --- Common methods ---
//

static void sortBindOpcodes(std::span<BindOpcodes::LocAndTarget> binds)
{
    std::sort(binds.begin(), binds.end(), [](const BindOpcodes::LocAndTarget& a, const BindOpcodes::LocAndTarget& b) {
        // sort by library, symbol, type, flags, then address
        if ( a.target != b.target ) {
            if ( a.target->libOrdinal != b.target->libOrdinal )
                return (a.target->libOrdinal < b.target->libOrdinal );
            if ( a.target->symbolName != b.target->symbolName ) {
                int cmpRes = strcmp(a.target->symbolName, b.target->symbolName);
                if ( cmpRes != 0 )
                    return (cmpRes < 0);
            }
            // TODO: type, if we ever need it, which we probably don't
            //if ( this->_type != rhs._type )
            //    return  (this->_type < rhs._type );
            // weak import set the flags field
            if ( a.target->weakImport != b.target->weakImport )
                return a.target->weakImport;
            if ( a.target->addend != b.target->addend )
                return (a.target->addend < b.target->addend );
        }
        // Sort by seg index and seg offset, which gives us address
        if ( a.segIndex != b.segIndex )
            return (a.segIndex < b.segIndex );
        return (a.segOffset < b.segOffset );
    });
}


// to work with dyld2's algorithm, all weak-bind opcodes need to be sorted by symbol name
static void sortWeakBindOpcodes(std::span<BindOpcodes::LocAndTarget> binds)
{
    std::sort(binds.begin(), binds.end(), [](const BindOpcodes::LocAndTarget& a, const BindOpcodes::LocAndTarget& b) {
        // sort by symbol name then address
        if ( a.target != b.target ) {
            if ( a.target->symbolName != b.target->symbolName )
                return ( strcmp(a.target->symbolName, b.target->symbolName) < 0 );
        }
        // Sort by seg index and seg offset, which gives us address
        if ( a.segIndex != b.segIndex )
            return (a.segIndex < b.segIndex );
        return (a.segOffset < b.segOffset );
    });
}

enum class BuilderKind { regular, lazy, weak };

static void append_byte(std::vector<uint8_t>& buffer, uint8_t value)
{
    buffer.push_back(value);
}

static void append_uleb128(std::vector<uint8_t>& buffer, uint64_t value)
{
    uint8_t byte;
    do {
        byte = value & 0x7F;
        value &= ~0x7F;
        if ( value != 0 )
            byte |= 0x80;
        buffer.push_back(byte);
        value = value >> 7;
    } while( byte >= 0x80 );
}

static void append_sleb128(std::vector<uint8_t>& buffer, int64_t value)
{
    bool isNeg = ( value < 0 );
    uint8_t byte;
    bool more;
    do {
        byte = value & 0x7F;
        value = value >> 7;
        if ( isNeg )
            more = ( (value != -1) || ((byte & 0x40) == 0) );
        else
            more = ( (value != 0) || ((byte & 0x40) != 0) );
        if ( more )
            byte |= 0x80;
        buffer.push_back(byte);
    }
    while( more );
}

static void append_string(std::vector<uint8_t>& buffer, const char* str)
{
    for (const char* s = str; *s != '\0'; ++s)
        buffer.push_back(*s);
    buffer.push_back('\0');
}

static std::vector<uint8_t> buildBindOpcodes(std::span<LocAndTarget> binds, bool is64, BuilderKind kind, LazyBindOpcodesWriter::LazyStartRecorder lazyStartsRecorder)
{
    if ( binds.empty() )
        return { };

    uint32_t pointerSize = is64 ? 8 : 4;
    std::vector<uint8_t> opcodes;

    struct BindTmp
    {
        BindTmp(uint8_t op, uint64_t p1, uint64_t p2=0, const char* s=NULL)
            : opcode(op), operand1(p1), operand2(p2), name(s) {}
        uint8_t        opcode;
        uint64_t    operand1;
        uint64_t    operand2;
        const char*    name;
    };

    // convert to temp encoding that can be more easily optimized
    std::vector<BindTmp> mid;
    uint32_t    curSegIndex  = 0;
    uint64_t    curSegOffset = -1;
    int         ordinal      = (kind == BuilderKind::weak) ? -3 : 0x80000000;
    const char* symbolName   = nullptr;
    int64_t     addend       = 0;
    uint8_t     type         = 0;

    if ( kind == BuilderKind::lazy ) {
        for (const LocAndTarget& bind : binds) {
            mid.emplace_back(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, bind.segIndex, bind.segOffset);
            if ( bind.target->libOrdinal <= 0 ) {
                // special lookups are encoded as negative numbers in BindingInfo
                mid.emplace_back(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, bind.target->libOrdinal);
            }
            else if ( bind.target->libOrdinal<= 15 ) {
                // small libOrdinals can fit in opcode
                mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM, bind.target->libOrdinal);
            }
            else {
                mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, bind.target->libOrdinal);
            }
            uint32_t flags = (bind.target->weakImport ? BIND_SYMBOL_FLAGS_WEAK_IMPORT : 0);
            mid.emplace_back(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, flags, 0, bind.target->symbolName);
            mid.emplace_back(BIND_OPCODE_DO_BIND, 0);
            mid.emplace_back(BIND_OPCODE_DONE, 0);
        }
    }
    else {
        // sort by library, symbol, type, then address
        if ( kind == BuilderKind::weak )
            sortWeakBindOpcodes(binds);
        else
            sortBindOpcodes(binds);
        for (const LocAndTarget& bind : binds) {
            // weak binds don't have ordinals
            if ( kind != BuilderKind::weak ) {
                if ( ordinal != bind.target->libOrdinal ) {
                    if ( bind.target->libOrdinal <= 0 ) {
                        // special lookups are encoded as negative numbers in BindingInfo
                        mid.emplace_back(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, bind.target->libOrdinal);
                    }
                    else {
                        mid.emplace_back(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, bind.target->libOrdinal);
                    }
                    ordinal = bind.target->libOrdinal;
                }
            }
            if ( (symbolName == nullptr) || (strcmp(symbolName, bind.target->symbolName) != 0) ) {
                uint32_t flags = 0;
                if ( bind.target->weakImport )
                    flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
                if ( bind.target->strongOverrideOfWeakDef )
                    flags |= BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION;
                mid.emplace_back(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, flags, 0, bind.target->symbolName);
                symbolName = bind.target->symbolName;

                // We don't emit any other opcodes for strong overrides, so skip to the next bind
                if ( bind.target->strongOverrideOfWeakDef )
                    continue;
            }
            if ( type != BIND_TYPE_POINTER ) {
                // Note: this code base does not support creating old i386 text fixups
                mid.emplace_back(BIND_OPCODE_SET_TYPE_IMM, BIND_TYPE_POINTER);
                type = BIND_TYPE_POINTER;
            }
            if ( curSegIndex != bind.segIndex ) {
                mid.emplace_back(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, bind.segIndex, bind.segOffset);
                curSegIndex  = bind.segIndex;
                curSegOffset = bind.segOffset;
            }
            else if ( curSegOffset != bind.segOffset ) {
                mid.emplace_back(BIND_OPCODE_ADD_ADDR_ULEB, bind.segOffset - curSegOffset);
                curSegOffset = bind.segOffset;
            }
            if ( addend != bind.target->addend ) {
                mid.emplace_back(BIND_OPCODE_SET_ADDEND_SLEB, bind.target->addend);
                addend = bind.target->addend;
            }
            mid.emplace_back(BIND_OPCODE_DO_BIND, 0);
            curSegOffset += pointerSize;
        }
        mid.emplace_back(BIND_OPCODE_DONE, 0);

        // optimize phase 1, combine bind/add pairs
        BindTmp* dst = &mid[0];
        for (const BindTmp* src = &mid[0]; src->opcode != BIND_OPCODE_DONE; ++src) {
            if ( (src->opcode == BIND_OPCODE_DO_BIND)
                && (src[1].opcode == BIND_OPCODE_ADD_ADDR_ULEB) ) {
                dst->opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
                dst->operand1 = src[1].operand1;
                ++src;
                ++dst;
            }
            else {
                *dst++ = *src;
            }
        }
        dst->opcode = BIND_OPCODE_DONE;

        // optimize phase 2, compress packed runs of BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB with
        // same addr delta into one BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
        dst = &mid[0];
        for (const BindTmp* src = &mid[0]; src->opcode != BIND_OPCODE_DONE; ++src) {
            uint64_t delta = src->operand1;
            if ( (src->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
                    && (src[1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
                    && (src[1].operand1 == delta) ) {
                // found at least two in a row, this is worth compressing
                dst->opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
                dst->operand1 = 1;
                dst->operand2 = delta;
                ++src;
                while ( (src->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
                        && (src->operand1 == delta) ) {
                    dst->operand1++;
                    ++src;
                }
                --src;
                ++dst;
            }
            else {
                *dst++ = *src;
            }
        }
        dst->opcode = BIND_OPCODE_DONE;

        // optimize phase 3, use immediate encodings
        for (BindTmp* p = &mid[0]; p->opcode != REBASE_OPCODE_DONE; ++p) {
            if ( (p->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB)
                && (p->operand1 < (15*pointerSize))
                && ((p->operand1 % pointerSize) == 0) ) {
                p->opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
                p->operand1 = p->operand1/pointerSize;
            }
            else if ( (p->opcode == BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB) && (p->operand1 <= 15) ) {
                p->opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_IMM;
            }
        }
        dst->opcode = BIND_OPCODE_DONE;
    }

    // convert to compressed encoding
    opcodes.reserve(256);
    bool done = false;
    size_t entryStartOffset = 0;
    for (const BindTmp& tmp : mid) {
        switch ( tmp.opcode ) {
            case BIND_OPCODE_DONE:
                append_byte(opcodes, BIND_OPCODE_DONE);
                if ( kind != BuilderKind::lazy )
                    done = true;
                break;
            case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
                append_byte(opcodes, BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | tmp.operand1);
                break;
            case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
                append_byte(opcodes, BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
                append_uleb128(opcodes, tmp.operand1);
                break;
            case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
                append_byte(opcodes, BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | (tmp.operand1 & BIND_IMMEDIATE_MASK));
                break;
            case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
                if ( lazyStartsRecorder )
                    lazyStartsRecorder(entryStartOffset, tmp.name);
                append_byte(opcodes, BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | tmp.operand1);
                append_string(opcodes, tmp.name);
                break;
            case BIND_OPCODE_SET_TYPE_IMM:
                append_byte(opcodes, BIND_OPCODE_SET_TYPE_IMM | tmp.operand1);
                break;
            case BIND_OPCODE_SET_ADDEND_SLEB:
                append_byte(opcodes, BIND_OPCODE_SET_ADDEND_SLEB);
                append_sleb128(opcodes, tmp.operand1);
                break;
            case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
                if ( lazyStartsRecorder )
                    entryStartOffset = opcodes.size();
                append_byte(opcodes, BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | tmp.operand1);
                append_uleb128(opcodes, tmp.operand2);
                break;
            case BIND_OPCODE_ADD_ADDR_ULEB:
                append_byte(opcodes, BIND_OPCODE_ADD_ADDR_ULEB);
                append_uleb128(opcodes, tmp.operand1);
                break;
            case BIND_OPCODE_DO_BIND:
                append_byte(opcodes, BIND_OPCODE_DO_BIND);
                break;
            case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
                append_byte(opcodes, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB);
                append_uleb128(opcodes, tmp.operand1);
                break;
            case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
                append_byte(opcodes, BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED | tmp.operand1 );
                break;
            case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
                append_byte(opcodes, BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB);
                append_uleb128(opcodes, tmp.operand1);
                append_uleb128(opcodes, tmp.operand2);
                break;
        }
        if ( done )
            break;
    }

    // align to pointer size
    while ( (opcodes.size() % pointerSize) != 0 )
        append_byte(opcodes, 0);

    return opcodes;
}

//
// MARK: --- BindOpcodesWriter methods ---
//

BindOpcodesWriter::BindOpcodesWriter(std::span<LocAndTarget> binds, bool is64)
    : BindOpcodes(nullptr, 0, is64)
{
    _opcodes = buildBindOpcodes(binds, is64, BuilderKind::regular, nullptr);

    _opcodesStart = _opcodes.data();
    _opcodesEnd   = _opcodes.data()+_opcodes.size();
}

//
// MARK: --- LazyBindOpcodesWriter methods ---
//
LazyBindOpcodesWriter::LazyBindOpcodesWriter(std::span<LocAndTarget> binds, bool is64, LazyStartRecorder lazyStartsRecorder)
    : LazyBindOpcodes(nullptr, 0, is64)
{
    _opcodes = buildBindOpcodes(binds, is64, BuilderKind::lazy, lazyStartsRecorder);

    _opcodesStart = _opcodes.data();
    _opcodesEnd   = _opcodes.data()+_opcodes.size();
}

//
// MARK: --- WeakBindOpcodesWriter methods ---
//
WeakBindOpcodesWriter::WeakBindOpcodesWriter(std::span<LocAndTarget> binds, bool is64)
    : WeakBindOpcodes(nullptr, 0, is64)
{
    _opcodes = buildBindOpcodes(binds, is64, BuilderKind::weak, nullptr);

    _opcodesStart = _opcodes.data();
    _opcodesEnd   = _opcodes.data()+_opcodes.size();
}


} // namespace mach_o