PropertyList.cpp diff - common/PropertyList.cpp - Dyld source code dyld-360.21

common/PropertyList.cpp /dev/null ⇄ dyld-1330
--- /dev/null
+++ dyld/dyld-1330/common/PropertyList.cpp
@@ -0,0 +1,534 @@
+/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- vim: ft=cpp et ts=4 sw=4:
+ *
+ * Copyright (c) 2023 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include "PropertyList.h"
+
+#include <bit>
+#include <array>
+#include <algorithm>
+
+#include "Vector.h"
+
+using lsl::Allocator;
+using lsl::Vector;
+
+namespace {
+// Figure out how large of integer is needed to store the value
+static uint8_t bytesNeededForIntegerValue(uint64_t value) {    // Check to see if value fits by generating an inverse mask of 2^result-1 and see if any bits leak
+    for(uint8_t i = 1; i < 8; i<<=1) {
+        if ((value & ~((1ULL<<(i*8))-1)) == 0) {
+            // No bytes leaked, so we have enough bytes to hold the value
+            return i;
+        }
+    }
+    return 8;
+}
+
+// Figure out how large of integer is needed to store the value
+static uint8_t bytesNeededForIntegerValue(int64_t value) {
+    bool negativeValue = value & 0xf000'0000'0000'0000;
+    if (!negativeValue) {
+        // If the value is positive and the highest bit in the encoding is set it will incorrectly decode as negative
+        // to avoid that we bit shift all positive values by 1, that way if the top most bit is set it will over flow
+        // into the next byte and increase the encoding size
+        value <<= 1;
+    }
+    return bytesNeededForIntegerValue((uint64_t)value);
+}
+
+// A non-allocating quick sort. Hard wired for indirect comparisons
+template<class T>
+void quickSort(T begin, T end) {
+    if (begin == end) {
+        return;
+    }
+
+    auto pivot = *std::next(begin, (end - begin) / 2);
+    auto parition1 = std::partition(begin, end, [pivot](const auto& em) {
+        // We are sorting pointers to elements by the value of the element,
+        // so we need an extra deref here.
+        return *em < *pivot;
+    });
+    auto parition2 = std::partition(parition1, end, [pivot](const auto& em) {
+        // We are sorting pointers to elements by the value of the element,
+        // so we need an extra deref here.
+        return !(*pivot < *em);
+    });
+
+    quickSort(begin, parition1);
+    quickSort(parition2, end);
+}
+
+template<typename T>
+void sortUniqueAndRedirect(Allocator& allocator, Vector<T>& objects, Vector<uint64_t>& offsets, ByteStream& bytes) {
+    if (objects.empty()) { return; }
+
+    // Sort the elements
+    quickSort(objects.begin(), objects.end());
+
+    uint64_t lastObjectIndex = offsets.size();
+    T lastObject = nullptr;
+
+    // We walk through the sorted vector one element at a time. If it matches the lastUniqueObject we redirect it to that, otherwise
+    // we set lastUniqueObject to the new object, increment the object index, and emit it to the output stream
+    for (auto i = 0; i < objects.size(); ++i) {
+        if (lastObject && (*objects[i] == *lastObject)) {
+            objects[i]->convertToRedirect(lastObject->index());
+        } else {
+            objects[i]->setIndex(lastObjectIndex++);
+            lastObject = objects[i];
+            offsets.push_back(bytes.size());
+            lastObject->emit(0, bytes);
+        }
+    }
+}
+
+// bplist00 uses the encoded integer internally in arrays and dictionaries
+static void emitPlistEncodedInteger(int64_t _value, ByteStream &bytes) {
+    uint8_t size = bytesNeededForIntegerValue(_value);
+    bytes.push_back(std::byte{0x10} | std::byte(std::countr_zero(size)));
+    bytes.push_back(size, _value);
+}
+
+// bplist00 uses the encoded integer internally in arrays and dictionaries
+static void emitUnsignedPlistEncodedInteger(uint64_t _value, ByteStream &bytes) {
+    uint8_t size = bytesNeededForIntegerValue(_value);
+    bytes.push_back(std::byte{0x10} | std::byte(std::countr_zero(size)));
+    bytes.push_back(size, _value);
+}
+
+};
+
+#pragma mark -
+#pragma mark Plist object types
+
+#pragma mark Base Object
+
+void PropertyList::Object::convertToRedirect(uint64_t index) {
+    deallocate();
+    _isRedirect = true;
+    _index = index;
+}
+void PropertyList::Object::setIndex(uint64_t index) {
+    _index = index;
+};
+uint64_t PropertyList::Object::index() const {
+    return _index;
+};
+
+PropertyList::Object::Type PropertyList::Object::type() const {
+    return Type(_type);
+}
+
+bool PropertyList::Object::processed() const {
+    return _processed;
+}
+
+void PropertyList::Object::setProcessed() {
+    _processed = true;
+}
+
+#pragma mark Integer
+
+PropertyList::Integer::Integer(int64_t value) : Object(Type::Integer), _value(value) {}
+PropertyList::Integer::Integer(Allocator& allocator, int64_t value) : Object(Type::Integer), _value(value) {}
+
+bool PropertyList::Integer::operator==(const PropertyList::Integer& other) const {
+    return (*this <=> other) == std::strong_ordering::equal;
+}
+
+void PropertyList::Integer::emit(uint8_t objectIndexSize, ByteStream& bytes) {
+    // int    0001 0nnn    ...        // # of bytes is 2^nnn, big-endian bytes
+    emitPlistEncodedInteger(_value, bytes);
+
+}
+void PropertyList::Integer::deallocate() {}
+
+std::strong_ordering PropertyList::Integer::operator<=>(const Integer& other) const {
+    return (_value <=> other._value);
+}
+
+#pragma mark Data
+
+void PropertyList::Data::emit(uint8_t objectIndexSize, ByteStream& bytes) {
+    // data    0100 nnnn    [int]    ...    // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
+    uint64_t size = _value.size();
+    if (size < 15) {
+        bytes.push_back(std::byte{0x40} | (std::byte)size);
+    } else {
+        bytes.push_back(std::byte{0x4f});
+        emitUnsignedPlistEncodedInteger(size, bytes);
+    }
+    std::copy(_value.begin(), _value.end(), std::back_inserter(bytes));
+}
+
+void PropertyList::Data::deallocate() {
+    _value.resize(0);
+}
+
+std::strong_ordering PropertyList::Data::operator<=>(const PropertyList::Data& other) const {
+    std::strong_ordering order = _value.size() <=> other._value.size();
+    if (order == std::strong_ordering::equal) {
+        order = (memcmp((void*)_value.data(), (void*)other._value.data(),  (size_t)_value.size()) <=> 0);
+    }
+    return order;
+}
+
+bool PropertyList::Data::operator==(const Data& other) const {
+    return (*this <=> other) == std::strong_ordering::equal;
+}
+
+PropertyList::Data::Data(Allocator& allocator,  uint64_t size) : Object(Type::Data), _value(allocator) {
+    _value.insert(_value.begin(), size, std::byte{0});
+}
+
+PropertyList::Data::Data(Allocator& allocator, std::span<std::byte> value) : Object(Type::Data), _value(allocator) {
+    _value.insert(_value.begin(), value.begin(), value.end());
+}
+
+std::span<std::byte> PropertyList::Data::bytes() {
+    return _value;
+}
+
+
+#pragma mark String
+
+bool PropertyList::String::emitUnicode(uint8_t objectIndexSize, uint64_t stringSize, ByteStream& bytes) const {
+    // string    0110 nnnn    [int]    ...    // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
+    bool foundUnicode = false;
+    for(auto i = 0; i < stringSize; ++i) {
+        if (((uint8_t*)_value)[i] & 0x80) {
+            foundUnicode = true;
+            break;
+        }
+    }
+    if (!foundUnicode) {
+        return false;
+    }
+
+    // We have a UTF-8 string, from which we cannot tell how many UTF-8 chaarcters will result. So we decode the whole thing into a
+    // vector then emit it. If the UTF-8 is malformed we bail out here and pass it through as a malformed ASCII string. The decoder
+    // has to deal with malformed content anyway, and that way we have the data intact data in the atlas so we can inspect
+    // manually if necessary.
+    #define CHECK_UTF8_INTERMEDIATY_BYTE(x) \
+        if (((x) & 0xc0) != 0x80) { return false;}
+    Vector<uint16_t> utf16chars(bytes.allocator());
+    for(auto i = 0; i < stringSize; ++i) {
+        uint16_t value = ((uint8_t*)_value)[i];
+        if ((value & 0x80) == 0x00) {
+            // 1 byte
+            utf16chars.push_back(value);
+        } else if ((value & 0xe0) == 0xc0) {
+            // 2 bytes
+            if (i+1 >= stringSize) { return false; }
+            uint16_t value2 = ((uint8_t*)_value)[i+1];
+            CHECK_UTF8_INTERMEDIATY_BYTE(value2);
+            utf16chars.push_back(((value & 0x1f) << 6)
+                                 | (value2 & 0x3f));
+            i += 1;
+        } else if ((value & 0xf0) == 0xe0) {
+            // 3 bytes
+            if (i+2 >= stringSize) { return false; }
+            uint16_t value2 = ((uint8_t*)_value)[i+1];
+            uint16_t value3 = ((uint8_t*)_value)[i+2];
+            CHECK_UTF8_INTERMEDIATY_BYTE(value2);
+            CHECK_UTF8_INTERMEDIATY_BYTE(value3);
+            utf16chars.push_back(((value & 0x1f) << 12)
+                                 | ((value2 & 0x3f) << 6)
+                                 | ((value3 & 0x3f)));
+                                 i += 2;
+        } else if ((value & 0xf8) == 0xf0) {
+            // 4 bytes
+            if (i+3 >= stringSize) { return false; }
+            uint32_t value1 = value;
+            uint32_t value2 = ((uint8_t*)_value)[i+1];
+            uint32_t value3 = ((uint8_t*)_value)[i+2];
+            uint32_t value4 = ((uint8_t*)_value)[i+3];
+            CHECK_UTF8_INTERMEDIATY_BYTE(value2);
+            CHECK_UTF8_INTERMEDIATY_BYTE(value3);
+            CHECK_UTF8_INTERMEDIATY_BYTE(value4);
+
+            value1 = (value1 & 0x07) << 18;
+            value1 |= (value2 & 0x3f) << 12;
+            value1 |= (value3 & 0x3f) << 6;
+            value1 |= (value4 & 0x3f);
+            value1 -= 0x10000;
+
+            uint16_t highSurrogate = ((uint16_t)((value1>>10) & 0x03ff) + 0xd800);
+            uint16_t lowSurrogate = (uint16_t)((value1 & 0x03ff) + 0xdc00);
+            utf16chars.push_back(highSurrogate);
+            utf16chars.push_back(lowSurrogate);
+            i += 3;
+        }
+    }
+
+    if (utf16chars.size() < 15) {
+        bytes.push_back(std::byte{0x60} | (std::byte)utf16chars.size());
+    } else {
+        bytes.push_back(std::byte{0x6f});
+        PropertyList::Integer encodedSize(utf16chars.size());
+        encodedSize.emit(objectIndexSize, bytes);
+    }
+    for (auto& utf16char : utf16chars) {
+        bytes.push_back((uint16_t)utf16char);
+    }
+    return true;
+}
+
+void PropertyList::String::emit(uint8_t objectIndexSize, ByteStream& bytes) {
+    uint64_t size = strlen(_value);
+
+    if (emitUnicode(objectIndexSize, size, bytes)) {
+        // The string was unicode and has been emitted, return
+        return;
+    }
+    // string    0101 nnnn    [int]    ...    // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
+    if (size < 15) {
+        bytes.push_back(std::byte{0x50} | (std::byte)size);
+    } else {
+        bytes.push_back(std::byte{0x5f});
+        PropertyList::Integer encodedSize(size);
+        encodedSize.emit(objectIndexSize, bytes);
+    }
+    std::copy((std::byte*)_value, (std::byte*)_value+size, std::back_inserter(bytes));
+}
+
+void PropertyList::String::deallocate() {
+    if (_isRedirect) { return; }
+    Allocator::freeObject((void*)_value);
+    _value = nullptr;
+}
+
+std::strong_ordering PropertyList::String::operator<=>(const PropertyList::String& other) const {
+    return (strcmp(_value, other._value) <=> 0);
+}
+bool PropertyList::String::operator==(const String& other) const {
+    return (*this <=> other) == std::strong_ordering::equal;
+}
+
+PropertyList::String::String(Allocator& allocator, std::string_view value) : Object(Type::String) {
+    char* p = (char*)allocator.malloc(value.size()+1);
+    memcpy(p, value.data(), value.size());
+    p[value.size()] = '\0';
+    _value = p;
+}
+
+PropertyList::String::~String() {
+    if (_isRedirect) { return; }
+    if (_value) {
+        Allocator::freeObject((void*)_value);
+    }
+}
+
+#pragma mark  Array
+
+PropertyList::Array::Array(Allocator& allocator) : Object(Type::Array), _values(allocator) {}
+
+std::span<PropertyList::Object*> PropertyList::Array::values() {
+    return _values;
+}
+
+void  PropertyList::Array::emit(uint8_t objectIndexSize, ByteStream& bytes) {
+    // 1010 nnnn    [int]    objref*    // nnnn is count, unless '1111', then int count follows
+    uint64_t size = _values.size();
+    if (size < 15) {
+        bytes.push_back(std::byte{0xa0} | (std::byte)size);
+    } else {
+        bytes.push_back(std::byte{0xaf});
+        emitUnsignedPlistEncodedInteger(size, bytes);
+    }
+    for (auto i = 0; i < size; ++i) {
+        bytes.push_back(objectIndexSize, _values[i]->index());
+    }
+}
+
+void PropertyList::Array::deallocate() {
+    if (_isRedirect) { return; }
+    for (auto element : _values) {
+        element->deallocate();
+    }
+}
+
+#pragma mark Dictionary
+
+PropertyList::Dictionary::Dictionary(Allocator& allocator) : Object(Type::Dictionary), _keys(allocator), _values(allocator) {}
+
+std::span<PropertyList::Object*> PropertyList::Dictionary::keys() {
+    return _keys;
+}
+std::span<PropertyList::Object*> PropertyList::Dictionary::values() {
+    return _values;
+}
+
+void PropertyList::Dictionary::emit(uint8_t objectIndexSize, ByteStream& bytes) {
+    //dict    1101 nnnn    [int]    keyref* objref*    // nnnn is count, unless '1111', then int count follows
+    uint64_t size = _keys.size();
+    if (size < 15) {
+        bytes.push_back(std::byte{0xd0} | (std::byte)size);
+    } else {
+        bytes.push_back(std::byte{0xdf});
+        PropertyList::Integer encodedSize(size);
+        encodedSize.emit(objectIndexSize, bytes);
+    }
+    for (auto i = 0; i < size; ++i) {
+        bytes.push_back(objectIndexSize, _keys[i]->index());
+    }
+    for (auto i = 0; i < size; ++i) {
+        bytes.push_back(objectIndexSize, _values[i]->index());
+    }
+}
+
+void PropertyList::Dictionary::deallocate() {
+    if (_isRedirect) { return; }
+    for (auto element : _keys) {
+        element->deallocate();
+    }
+    for (auto element : _values) {
+        element->deallocate();
+    }
+}
+
+#pragma mark UUID
+
+PropertyList::UUID::UUID(Allocator& allocator, uuid_t uuid) : Data(allocator, std::span((std::byte*)&uuid[0], 16)) {}
+
+#pragma mark Botmap
+
+// Need to round up to the next multiple 8 since we used 8 bit bytes
+PropertyList::Bitmap::Bitmap(Allocator& allocator, uint64_t size) : PropertyList::Data(allocator, ((size + 7) & (-8))/8) {}
+void PropertyList::Bitmap::setBit(uint64_t bit) {
+    assert(bit < bytes().size()*8);
+    std::byte* bitmap = bytes().data();
+    std::byte* byte = &bitmap[bit/8];
+    (*byte) |= (std::byte)(1<<(bit%8));
+}
+
+#pragma mark -
+
+// This a WRITEONLY plist implementation. There is no way to query objects in it. It also has a number of other
+// limitations, such as only usng strings for keys, etc. Its goal is to work in the dyld runtime environment
+// with enough functionality to emit the process info and nothing more.
+
+PropertyList::PropertyList(Allocator& allocator) :  _allocator(allocator), _rootDictionary(allocator) {}
+
+void PropertyList::encode(ByteStream& bytes) {
+    Vector<uint64_t>        offsets(bytes.allocator());
+    uint64_t                offsetTableOffset   = 0;
+    uint64_t                numObjects          = 0;
+    uint64_t                topObject           = 0;
+    uint8_t                 offsetSize          = 0;
+    uint8_t                 objectIndexSize     = 0;
+
+    Vector<String*>     strings(_allocator);
+    Vector<Integer*>    integers(_allocator);
+    Vector<Data*>       datas(_allocator);
+    Vector<Object*>     collections(_allocator);
+    Vector<Object*>     objectsToProcess(_allocator);
+
+    // First we sort out all the integers, strings, data for uniquing, while pull out the collections to flatten
+    objectsToProcess.push_back(&_rootDictionary);
+    while(!objectsToProcess.empty()) {
+        Vector<Object*> newObjects(_allocator);
+        for (auto i : objectsToProcess) {
+            if (i->processed()) { continue; }
+            i->setProcessed();
+            switch(i->type()) {
+                case Object::Type::String:
+                    strings.push_back(reinterpret_cast<String*>(i));
+                    break;
+                case Object::Type::Integer:
+                    integers.push_back(reinterpret_cast<Integer*>(i));
+                    break;
+                case Object::Type::Data:
+                    datas.push_back(reinterpret_cast<Data*>(i));
+                    break;
+                case Object::Type::Array: {
+                    Array* array = reinterpret_cast<Array*>(i);
+                    collections.push_back(i);
+                    std::copy(array->values().begin(), array->values().end(), std::back_inserter(newObjects));
+                } break;
+                case Object::Type::Dictionary: {
+                    Dictionary* dict = reinterpret_cast<Dictionary*>(i);
+                    collections.push_back(i);
+                    // Since this is a collection its children need to be processed, add them to newObjects so they will be handled,
+                    // next time we loop around.
+                    std::copy(dict->keys().begin(), dict->keys().end(), std::back_inserter(newObjects));
+                    std::copy(dict->values().begin(), dict->values().end(), std::back_inserter(newObjects));
+                } break;
+            }
+        }
+        objectsToProcess = newObjects;
+    }
+
+    //Write header
+    bytes.setEndian(ByteStream::Endian::Big);
+    bytes.push_back("bplist00");
+
+    // Sort, unique, and write out each type
+    sortUniqueAndRedirect(_allocator, strings, offsets, bytes);
+    sortUniqueAndRedirect(_allocator, integers, offsets, bytes);
+    sortUniqueAndRedirect(_allocator, datas, offsets, bytes);
+
+    topObject = offsets.size();
+    numObjects = offsets.size() + collections.size();
+    objectIndexSize = bytesNeededForIntegerValue(numObjects);
+
+    //emit collections
+    uint64_t currentIndex = offsets.size();
+    for (auto i : collections) {
+        i->setIndex(currentIndex++);
+    }
+
+    //emit collections
+    for (auto i : collections) {
+        offsets.push_back(bytes.size());
+        i->emit(objectIndexSize, bytes);
+    }
+
+    offsetTableOffset = bytes.size();
+    offsetSize = bytesNeededForIntegerValue(offsetTableOffset);
+
+    for (auto i = 0; i < offsets.size(); ++i) {
+        bytes.push_back(offsetSize, offsets[i]);
+    }
+
+    // Write trailer
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)0x0);
+    bytes.push_back((uint8_t)offsetSize);
+    bytes.push_back((uint8_t)objectIndexSize);
+
+    bytes.push_back((uint64_t)numObjects);
+    bytes.push_back((uint64_t)topObject); // Root dictionary is always the first collection
+    bytes.push_back((uint64_t)offsetTableOffset);
+}
+
+PropertyList::Dictionary& PropertyList::rootDictionary() {
+    return _rootDictionary;
+}
+