Loading...
--- /dev/null
+++ dyld/dyld-1340/framework/ProcessScavenger.cpp
@@ -0,0 +1,359 @@
+/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- vim: ft=cpp et ts=4 sw=4:
+ *
+ * Copyright (c) 2023 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <fcntl.h>
+#include <libgen.h>
+#include <algorithm>
+#include <libproc.h>
+#include <sys/mman.h>
+#include <mach/task.h>
+#include <sys/malloc.h>
+#include <mach/mach_vm.h>
+#include <mach/mach_traps.h>
+
+#include <TargetConditionals.h>
+
+#include "AAREncoder.h"
+#include "Allocator.h"
+#include "PropertyList.h"
+#include "ProcessScavenger.h"
+#include "SnapshotShared.h"
+#include "Header.h"
+#include "DyldSharedCache.h"
+#include "Vector.h"
+#include "SafeVMPrimitives.h"
+
+#include <sys/fsgetpath.h>
+#include <mach-o/dyld_priv.h>
+#include <mach-o/dyld_process_info.h>
+
+#include "dyld_cache_format.h"
+#include "dyld_process_info_internal.h"
+
+using lsl::Allocator;
+using lsl::UniquePtr;
+using lsl::Vector;
+using mach_o::Header;
+using UUID = PropertyList::UUID;
+using Array = PropertyList::Array;
+using Data = PropertyList::Data;
+using Dictionary = PropertyList::Dictionary;
+using String = PropertyList::String;
+using Bitmap = PropertyList::Bitmap;
+using Integer = PropertyList::Integer;
+
+namespace {
+
+struct MmappedBuffer {
+ MmappedBuffer() = default;
+ MmappedBuffer(const MmappedBuffer&) = delete;
+ MmappedBuffer(MmappedBuffer&& other) {
+ swap(other);
+ }
+ MmappedBuffer& operator=(const MmappedBuffer&) = delete;
+ MmappedBuffer& operator=(MmappedBuffer&& other) {
+ swap(other);
+ return *this;
+ }
+ MmappedBuffer(const char* path) {
+ int fd = open(path, O_RDONLY);
+ if (fd < 0 ) {
+ return;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ return;
+ }
+ _size = statBuf.st_size;
+ _data = (void*)mmap(nullptr, (size_t)_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (_data == MAP_FAILED) {
+ return;
+ }
+ close(fd);
+ }
+ ~MmappedBuffer() {
+ if (!_data) { return; }
+ munmap(_data, (size_t)_size);
+ }
+ uint64_t size() const {
+ return _size;
+ }
+ const std::span<const uint8_t> span() const {
+ return std::span<const uint8_t>((const uint8_t*)_data, (size_t)_size);
+ }
+private:
+ void swap(MmappedBuffer& other) {
+ if (this == &other) { return; }
+ using std::swap;
+ swap(_data, other._data);
+ swap(_size, other._size);
+ }
+ void* _data = nullptr;
+ uint64_t _size = 0;
+};
+
+struct RemoteMap {
+ RemoteMap(task_t task, mach_vm_address_t remote_address, vm_size_t size) : _size(size) {
+ vm_prot_t cur_protection = VM_PROT_NONE;
+ vm_prot_t max_protection = VM_PROT_READ;
+ mach_vm_address_t localAddress = 0;
+ auto kr = mach_vm_remap_new(mach_task_self(),
+ &localAddress,
+ _size,
+ 0, // mask
+ VM_FLAGS_ANYWHERE | VM_FLAGS_RESILIENT_CODESIGN | VM_FLAGS_RESILIENT_MEDIA,
+ task,
+ remote_address,
+ true,
+ &cur_protection,
+ &max_protection,
+ VM_INHERIT_NONE);
+ // The call is not succesfull return
+ if (kr != KERN_SUCCESS) {
+ _data = nullptr;
+ _size = 0;
+ return;
+ }
+ // Copy into a local buffer so our results are coherent in the event the page goes way due to storage removal,
+ // etc. We have to do this because even after we read the page the contents might go away of the object is paged
+ // out and then the backing region is disconnected (for example, if we are copying some memory in the middle of
+ // a mach-o that is on a USB drive that is disconnected after we perform the mapping). Once we copy them into a
+ // local buffer the memory will be handled by the default pager instead of potentially being backed by the mmap
+ // pager, and thus will be guaranteed not to mutate out from under us.
+ _data = malloc(_size);
+ if (_data == nullptr) {
+ _size = 0;
+ (void)vm_deallocate(mach_task_self(), (vm_address_t)localAddress, _size);
+ return;
+ }
+ remote_memory_audit_start();
+ memcpy(_data, (void *)localAddress, _size);
+ remote_memory_audit_end();
+ (void)vm_deallocate(mach_task_self(), (vm_address_t)localAddress, _size);
+ }
+ RemoteMap(const RemoteMap&) = delete;
+ RemoteMap(RemoteMap&& other) {
+ swap(other);
+ }
+ MmappedBuffer& operator=(const MmappedBuffer&) = delete;
+ RemoteMap& operator=(RemoteMap&& other) {
+ swap(other);
+ return *this;
+ }
+ ~RemoteMap() {
+ if (_data) {
+ free(_data);
+ }
+ }
+ operator bool() const {
+ return ((_data != nullptr) && (_size != 0));
+ }
+ const std::span<const uint8_t> span() const {
+ return std::span<const uint8_t>((const uint8_t*)_data, (size_t)_size);
+ }
+ uint64_t size() const {
+ return _size;
+ }
+private:
+ void swap(RemoteMap& other) {
+ if (this == &other) { return; }
+ using std::swap;
+ swap(_data, other._data);
+ swap(_size, other._size);
+ }
+ void* _data = nullptr;
+ vm_size_t _size = 0;
+};
+
+struct TaskSuspender {
+ TaskSuspender(task_read_t task) : _task(task) {
+ if (task != mach_task_self()) {
+ task_suspend(_task);
+ } else {
+ kern_return_t kr = task_threads(_task, &_threads, &_threadCount);
+ if (kr != KERN_SUCCESS) {
+ return;
+ }
+ for (auto i = 0; i < _threadCount; ++i) {
+ if (_threads[i] != mach_thread_self()) {
+ thread_suspend(_threads[i]);
+ }
+ }
+ }
+ }
+ ~TaskSuspender() {
+ if (_task != mach_task_self()) {
+ task_resume(_task);
+ } else {
+ for (auto i = 0; i < _threadCount; ++i) {
+ if (_threads[i] != mach_thread_self()) {
+ thread_resume(_threads[i]);
+ }
+ mach_port_deallocate(mach_task_self(), _threads[i]);
+ }
+ mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) _threads, _threadCount * sizeof(*_threads));
+ }
+ }
+private:
+ task_read_t _task = 0;
+ thread_act_array_t _threads = nullptr;
+ mach_msg_type_number_t _threadCount = 0;
+};
+
+static
+void addSegmentArray(PropertyList::Dictionary& image, const Header* header) {
+ __block Array* segments = nullptr;
+ header->forEachSegment(^(const mach_o::Header::SegmentInfo& info, bool& stop) {
+ if (info.segmentName == "__PAGEZERO") {
+ return;
+ }
+ if (!segments) {
+ segments = &image.addObjectForKey<Array>(kDyldAtlasImageSegmentArrayKey);
+ }
+ auto segment = &segments->addObject<Dictionary>();
+ segment->addObjectForKey<String>(kDyldAtlasSegmentNameKey, info.segmentName); // Note: we use the std::string_view part of CString
+ segment->addObjectForKey<Integer>(kDyldAtlasSegmentPreferredLoadAddressKey, info.vmaddr);
+ segment->addObjectForKey<Integer>(kDyldAtlasSegmentSizeKey, info.vmsize);
+ segment->addObjectForKey<Integer>(kDyldAtlasSegmentFileOffsetKey, info.fileOffset);
+ segment->addObjectForKey<Integer>(kDyldAtlasSegmentFileSizeKey, info.fileSize);
+ segment->addObjectForKey<Integer>(kDyldAtlasSegmentPermissionsKey, info.initProt);
+ });
+}
+
+bool scavengeProcessFromRegions(Allocator& allocator, task_read_t task, ByteStream& outputStream) {
+ TaskSuspender suspender(task);
+ pid_t pid;
+ auto propertyListEncoder = PropertyList(allocator);
+ auto& rootDictionary = propertyListEncoder.rootDictionary();
+ auto& images = rootDictionary.addObjectForKey<Array>(kDyldAtlasSnapshotImagesArrayKey);
+
+ kern_return_t kr = pid_for_task(task, &pid);
+ if ( kr != KERN_SUCCESS) {
+ return false;
+ }
+ rootDictionary.addObjectForKey<Integer>(kDyldAtlasSnapshotPidKey, pid);
+
+ auto snapshotFlags = rootDictionary.addObjectForKey<PropertyList::Flags<SnapshotFlags>>(kDyldAtlasSnapshotFlagsKey);
+ // Set the timestamp to 1, which is earlier then any real timestamp, but not 0, since tools use 0 as a sign
+ // the process is not running yet and the API call has failed.
+ rootDictionary.addObjectForKey<Integer>(kDyldAtlasSnapshotTimestampKey, 1);
+ UniquePtr<const std::byte> fullCacheHeader;
+ UniquePtr<const std::byte> infoArrayBuffer;
+ UniquePtr<const std::byte> aotInfoArrayBuffer;
+ rootDictionary.addObjectForKey<Integer>(kDyldAtlasSnapshotPlatformTypeKey, 0);
+
+#if TARGET_OS_WATCH && !TARGET_OS_SIMULATOR
+ snapshotFlags.setFlag(SnapshotFlagsPointerSize4Bytes, true);
+#endif
+ mach_vm_size_t size;
+ bool dyldFound = false;
+ bool mainExecutableFound = false;
+ for (mach_vm_address_t address = 0; ; address += size) {
+ vm_region_basic_info_data_64_t info;
+ mach_port_t objectName;
+ unsigned int infoCount = VM_REGION_BASIC_INFO_COUNT_64;
+ if ( mach_vm_region(task, &address, &size, VM_REGION_BASIC_INFO,
+ (vm_region_info_t)&info, &infoCount, &objectName) != KERN_SUCCESS ) {
+ break;
+ }
+ if ( info.protection != (VM_PROT_READ|VM_PROT_EXECUTE) ) {
+ continue;
+ }
+ RemoteMap map(task, address, std::min((size_t)size, (size_t)PAGE_SIZE));
+ if (!map) {
+ continue;
+ }
+ auto mf = Header::isMachO(map.span());
+ if (!mf) {
+ continue;
+ }
+ uint32_t headerSize = mf->headerAndLoadCommandsSize();
+ if (headerSize > PAGE_SIZE) {
+ size_t newSize = (size_t)lsl::roundToNextAligned(PAGE_SIZE, headerSize);
+ auto newMap = RemoteMap(task, address, newSize);
+ map = std::move(newMap);
+ if (!map) {
+ continue;
+ }
+ mf = Header::isMachO(map.span());
+ if (!mf) {
+ continue;
+ }
+ }
+ if (mf->isDylinker()) {
+ dyldFound = true;
+ }
+ if (mf->isMainExecutable()) {
+ mainExecutableFound = true;
+ }
+ // If this is not dyld or a main executable we don't need to scan the region
+ if (!mf->isDylinker() && !mf->isMainExecutable()) { continue; }
+ auto& image = images.addObject<Dictionary>();
+ uint64_t preferredLoadAddress = mf->preferredLoadAddress();
+ if (preferredLoadAddress) {
+ image.addObjectForKey<Integer>(kDyldAtlasImagePreferredLoadAddressKey, preferredLoadAddress);
+ }
+ image.addObjectForKey<Integer>(kDyldAtlasImageLoadAddressKey, address);
+ const char* installname = mf->installName();
+ if (installname) {
+ image.addObjectForKey<String>(kDyldAtlasImageInstallnameKey, installname);
+ }
+ uuid_t uuid;
+ if (mf->getUuid(uuid)) {
+ image.addObjectForKey<UUID>(kDyldAtlasImageUUIDKey, uuid);
+ }
+ char executablePath[PATH_MAX+1];
+ int len = proc_regionfilename(pid, address, executablePath, PATH_MAX);
+ if ( len != 0 ) {
+ executablePath[len] = '\0';
+ image.addObjectForKey<String>(kDyldAtlasImageFilePathKey, executablePath);
+ }
+ addSegmentArray(image, mf);
+ // If we have found dyld and the main executable we are done, exit early
+ if (dyldFound && mainExecutableFound) { break; }
+ }
+ rootDictionary.addObjectForKey<Integer>(kDyldAtlasSnapshotInitialImageCount, 1);
+ rootDictionary.addObjectForKey<Integer>(kDyldAtlasSnapshotState, dyld_process_state_not_started);
+
+ ByteStream fileStream(allocator);
+ propertyListEncoder.encode(fileStream);
+ AAREncoder aarEncoder(allocator);
+ aarEncoder.addFile("process.plist", fileStream);
+ aarEncoder.encode(outputStream);
+ return true;
+}
+};
+
+// via bufferSize. If the size is larger than was passed in then the return value is false. Otherwise it is true.
+bool scavengeProcess(task_read_t task, void** buffer, uint64_t* bufferSize) {
+ STACK_ALLOCATOR(allocator, 0);
+ ByteStream outputStream(allocator);
+ if (!scavengeProcessFromRegions(allocator, task, outputStream)) {
+ return false;
+ }
+ *bufferSize = outputStream.size();
+ *buffer = malloc((size_t)(*bufferSize));
+ std::copy(outputStream.begin(), outputStream.end(), (std::byte*)*buffer);
+ return true;
+}