Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 | /* * Copyright (c) 2006-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include <sys/param.h> #include <sys/kernel.h> #include <sys/proc_internal.h> #include <sys/systm.h> #include <sys/systm.h> #include <sys/mount_internal.h> #include <sys/fsctl.h> #include <sys/filedesc.h> #include <sys/vnode_internal.h> #include <sys/imageboot.h> #include <kern/assert.h> #include <vm/vm_far.h> #include <sys/namei.h> #include <sys/fcntl.h> #include <sys/vnode.h> #include <sys/xattr.h> #include <sys/sysproto.h> #include <sys/csr.h> #include <miscfs/devfs/devfsdefs.h> #include <libkern/crypto/sha2.h> #include <libkern/crypto/rsa.h> #include <libkern/OSKextLibPrivate.h> #include <sys/ubc_internal.h> #if CONFIG_IMAGEBOOT_IMG4 #include <libkern/img4/interface.h> #include <img4/firmware.h> #endif #include <kern/kalloc.h> #include <os/overflow.h> #include <vm/vm_kern_xnu.h> #include <pexpert/pexpert.h> #include <kern/chunklist.h> extern int (*mountroot)(void); extern char rootdevice[DEVMAXNAMESIZE]; #define DEBUG_IMAGEBOOT 0 #if DEBUG_IMAGEBOOT #define DBG_TRACE(...) printf("imageboot: " __VA_ARGS__) #else #define DBG_TRACE(...) do {} while(0) #endif #define AUTHDBG(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0) #define AUTHPRNT(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0) extern int di_root_image_ext(const char *path, char *devname, size_t devsz, dev_t *dev_p, bool removable); extern int di_root_image(const char *path, char *devname, size_t devsz, dev_t *dev_p); extern int di_root_ramfile_buf(void *buf, size_t bufsz, char *devname, size_t devsz, dev_t *dev_p); static boolean_t imageboot_setup_new(imageboot_type_t type); void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size); extern lck_rw_t rootvnode_rw_lock; #define kIBFilePrefix "file://" __private_extern__ int imageboot_format_is_valid(const char *root_path) { return strncmp(root_path, kIBFilePrefix, strlen(kIBFilePrefix)) == 0; } static void vnode_get_and_drop_always(vnode_t vp) { vnode_getalways(vp); vnode_rele(vp); vnode_put(vp); } __private_extern__ bool imageboot_desired(void) { bool do_imageboot = false; char *root_path = NULL; root_path = zalloc(ZV_NAMEI); /* * Check for first layer DMG rooting. * * Note that here we are principally concerned with whether or not we * SHOULD try to imageboot, not whether or not we are going to be able to. * * If NONE of the boot-args are present, then assume that image-rooting * is not requested. * * [!! Note parens guard the entire logically OR'd set of statements, below. It validates * that NONE of the below-mentioned boot-args is present...!!] */ if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) || #if CONFIG_IMAGEBOOT_IMG4 PE_parse_boot_argn("arp0", root_path, MAXPATHLEN) || #endif PE_parse_boot_argn("rp", root_path, MAXPATHLEN) || PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) || PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN))) { /* explicitly set to false */ do_imageboot = false; } else { /* now sanity check the file-path format */ if (imageboot_format_is_valid(root_path)) { DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path); /* root_path looks good and we have one of the aforementioned bootargs */ do_imageboot = true; } else { /* explicitly set to false */ do_imageboot = false; } } zfree(ZV_NAMEI, root_path); return do_imageboot; } __private_extern__ imageboot_type_t imageboot_needed(void) { imageboot_type_t result = IMAGEBOOT_NONE; char *root_path = NULL; DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__); if (!imageboot_desired()) { goto out; } root_path = zalloc(ZV_NAMEI); result = IMAGEBOOT_DMG; /* Check for second layer */ if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) || PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) { goto out; } /* Sanity-check second layer */ if (imageboot_format_is_valid(root_path)) { DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path); } else { panic("%s: Invalid URL scheme for %s", __FUNCTION__, root_path); } out: if (root_path != NULL) { zfree(ZV_NAMEI, root_path); } return result; } extern bool IOBaseSystemARVRootHashAvailable(void); /* * Mounts new filesystem based on image path, and pivots it to the root. * The image to be mounted is located at image_path. * It will be mounted at mount_path. * The vfs_switch_root operation will be performed. * After the pivot, the outgoing root filesystem (the filesystem at root when * this function begins) will be at outgoing_root_path. If `skip_signature_check` is true, * then ignore the chunklisted or authAPFS checks on this image */ __private_extern__ int imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path, const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check) { int error = 0; boolean_t authenticated_dmg_chunklist = false; vnode_t mount_vp = NULLVP; errno_t rootauth; if (type != IMAGEBOOT_DMG) { panic("not supported"); } /* * Check that the image file actually exists. * We also need to find the mount it's on, to mark it as backing the * root. */ vnode_t imagevp = NULLVP; error = vnode_lookup(image_path, 0, &imagevp, vfs_context_kernel()); if (error) { printf("%s: image file not found or couldn't be read: %d\n", __FUNCTION__, error); /* * bail out here to short-circuit out of panic logic below. * Failure to find the pivot-image should not be a fatal condition (ENOENT) * since it may result in natural consequences (ergo, cannot unlock filevault prompt). */ return error; } /* * load the disk image and obtain its device. * di_root_image's name and the names of its arguments suggest it has * to be mounted at the root, but that's not actually needed. * We just need to obtain the device info. */ dev_t dev; char devname[DEVMAXNAMESIZE]; const char *error_func = NULL; unsigned ramdisk_arg = 0; (void) PE_parse_boot_argn("-bsdmgroot-ramdisk", &ramdisk_arg, sizeof(ramdisk_arg)); if (ramdisk_arg) { size_t bufsz = 0; void *buf = NULL; error_func = "imageboot_read_file"; // no_softlimit: di_root_ramfile_buf is OK to handle a no_softlimit buffer error = imageboot_read_file_pageable(image_path, &buf, &bufsz, /* no_softlimit */ true); if (error == 0) { error_func = "di_root_ramfile_buf"; error = di_root_ramfile_buf(buf, bufsz, devname, sizeof(devname), &dev); } if (error && (buf != NULL)) { kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz); } } else { error_func = "di_root_image"; error = di_root_image_ext(image_path, devname, DEVMAXNAMESIZE, &dev, true); } if (error) { panic("%s: %s failed: %d", __FUNCTION__, error_func, error); } printf("%s: attached disk image %s as %s\n", __FUNCTION__, image_path, devname); #if CONFIG_IMAGEBOOT_CHUNKLIST if ((rooted_dmg == false) && !IOBaseSystemARVRootHashAvailable()) { error = authenticate_root_with_chunklist(image_path, NULL); if (error == 0) { printf("authenticated root-dmg via chunklist...\n"); authenticated_dmg_chunklist = true; } else { /* root hash was not available, and image is NOT chunklisted? */ printf("failed to chunklist-authenticate root-dmg @ %s\n", image_path); } } #endif char fulldevname[DEVMAXNAMESIZE + 5]; // "/dev/" strlcpy(fulldevname, "/dev/", sizeof(fulldevname)); strlcat(fulldevname, devname, sizeof(fulldevname)); /* * mount expects another layer of indirection (because it expects to * be getting a user_addr_t of a char *. * Make a pointer-to-pointer on our stack. It won't use this * address after it returns so this should be safe. */ char *fulldevnamep = &(fulldevname[0]); char **fulldevnamepp = &fulldevnamep; #define PIVOTMNT "/System/Volumes/BaseSystem" /* Attempt to mount as HFS; if it fails, then try as APFS */ printf("%s: attempting to mount as hfs...\n", __FUNCTION__); error = kernel_mount("hfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel()); if (error) { printf("mount failed: %d\n", error); printf("%s: attempting to mount as apfs...\n", __FUNCTION__); error = kernel_mount("apfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel()); } /* If we didn't mount as either HFS or APFS, then bail out */ if (error) { /* * Note that for this particular failure case (failure to mount), the disk image * being attached may have failed to quiesce within the alloted time out (20-30 sec). * For example, it may be still probing, or APFS container enumeration may have not * completed. If so, then we may have fallen into this particular error case. However, * failure to complete matching should be an exceptional case as 30 sec. is quite a * long time to wait for matching to complete (which would have occurred in * di_root_image_ext). */ #if defined(__arm64__) && XNU_TARGET_OS_OSX panic("%s: failed to mount pivot image(%d)!", __FUNCTION__, error); #endif printf("%s: failed to mount pivot image(%d) !", __FUNCTION__, error); goto done; } /* otherwise, if the mount succeeded, then assert that the DMG is authenticated (either chunklist or authapfs) */ error = vnode_lookup(PIVOTMNT, 0, &mount_vp, vfs_context_kernel()); if (error) { #if defined(__arm64__) && XNU_TARGET_OS_OSX panic("%s: failed to lookup pivot root (%d) !", __FUNCTION__, error); #endif printf("%s: failed to lookup pivot root (%d)!", __FUNCTION__, error); goto done; } /* the 0x1 implies base system */ rootauth = VNOP_IOCTL(mount_vp, FSIOC_KERNEL_ROOTAUTH, (caddr_t)0x1, 0, vfs_context_kernel()); if (rootauth) { printf("BS-DMG failed to authenticate intra-FS \n"); /* * If we are using a custom rooted DMG, or if we have already authenticated * the DMG via chunklist, then it is permissible to use. * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant. */ if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) { rootauth = 0; } error = rootauth; } vnode_put(mount_vp); mount_vp = NULLVP; if (error) { /* * Failure here exclusively means that the mount failed to authenticate. * This means that the disk image either was not sealed (authapfs), or it was * not hosted on a chunklisted DMG. Both scenarios may be fatal depending * on the platform. */ #if defined(__arm64__) && XNU_TARGET_OS_OSX panic("%s: could not authenticate the pivot image: %d. giving up.", __FUNCTION__, error); #endif printf("%s: could not authenticate the pivot image: %d. giving up.\n", __FUNCTION__, error); goto done; } if (rootvnode) { mount_t root_mp = vnode_mount(rootvnode); if (root_mp && (root_mp->mnt_kern_flag & MNTK_SSD)) { rootvp_is_ssd = true; } } /* * pivot the incoming and outgoing filesystems */ error = vfs_switch_root(mount_path, outgoing_root_path, 0); if (error) { panic("%s: vfs_switch_root failed: %d", __FUNCTION__, error); } /* * Mark the filesystem containing the image as backing root, so it * won't be unmountable. * * vfs_switch_root() clears this flag, so we have to set it after * the pivot call. * If the system later pivots out of the image, vfs_switch_root * will clear it again, so the backing filesystem can be unmounted. */ if (!ramdisk_arg) { mount_t imagemp = imagevp->v_mount; lck_rw_lock_exclusive(&imagemp->mnt_rwlock); imagemp->mnt_kern_flag |= MNTK_BACKS_ROOT; lck_rw_done(&imagemp->mnt_rwlock); } error = 0; /* * Note that we do NOT change kern.bootuuid here - * imageboot_mount_image() does, but imageboot_pivot_image() doesn't. * imageboot_mount_image() is used when the root volume uuid was * "always supposed to be" the one inside the dmg. imageboot_pivot_ * image() is used when the true root volume just needs to be * obscured for a moment by the dmg. */ done: if (imagevp != NULLVP) { vnode_put(imagevp); } return error; } /* kern_sysctl.c */ extern uuid_string_t fake_bootuuid; static void set_fake_bootuuid(mount_t mp) { struct vfs_attr va; VFSATTR_INIT(&va); VFSATTR_WANTED(&va, f_uuid); if (vfs_getattr(mp, &va, vfs_context_current()) != 0) { return; } if (!VFSATTR_IS_SUPPORTED(&va, f_uuid)) { return; } uuid_unparse(va.f_uuid, fake_bootuuid); } /* * Swaps in new root filesystem based on image path. * Current root filesystem is removed from mount list and * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and * "rootvnode" is reset. Root vnode of currentroot filesystem * is returned with usecount (no iocount). * kern.bootuuid is arranged to return the UUID of the mounted image. (If * we did nothing here, it would be the UUID of the image source volume.) */ __private_extern__ int imageboot_mount_image(const char *root_path, int height, imageboot_type_t type) { dev_t dev; int error; /* * Need to stash this here since we may do a kernel_mount() on /, which will * automatically update the rootvnode global. Note that vfs_mountroot() does * not update that global, which is a bit weird. */ vnode_t old_rootvnode = rootvnode; vnode_t newdp; mount_t new_rootfs; boolean_t update_rootvnode = FALSE; if (type == IMAGEBOOT_DMG) { error = di_root_image(root_path, rootdevice, DEVMAXNAMESIZE, &dev); if (error) { panic("%s: di_root_image failed: %d", __FUNCTION__, error); } rootdev = dev; mountroot = NULL; printf("%s: root device 0x%x\n", __FUNCTION__, rootdev); error = vfs_mountroot(); if (error != 0) { panic("vfs_mountroot() failed."); } update_rootvnode = TRUE; } else { panic("invalid imageboot type: %d", type); } /* * Get the vnode for '/'. * Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) { panic("%s: cannot find root vnode", __FUNCTION__); } DBG_TRACE("%s: old root fsname: %s\n", __FUNCTION__, old_rootvnode->v_mount->mnt_vtable->vfc_name); if (old_rootvnode != NULL) { /* remember the old rootvnode, but remove it from mountlist */ mount_t old_rootfs = old_rootvnode->v_mount; mount_list_remove(old_rootfs); mount_lock(old_rootfs); old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT; old_rootfs->mnt_flag &= ~MNT_ROOTFS; mount_unlock(old_rootfs); } vnode_ref(newdp); vnode_put(newdp); lck_rw_lock_exclusive(&rootvnode_rw_lock); /* switch to the new rootvnode */ if (update_rootvnode) { rootvnode = newdp; set_fake_bootuuid(rootvnode->v_mount); } new_rootfs = rootvnode->v_mount; mount_lock(new_rootfs); new_rootfs->mnt_flag |= MNT_ROOTFS; mount_unlock(new_rootfs); kernproc->p_fd.fd_cdir = newdp; lck_rw_unlock_exclusive(&rootvnode_rw_lock); DBG_TRACE("%s: root switched\n", __FUNCTION__); if (old_rootvnode != NULL) { #ifdef CONFIG_IMGSRC_ACCESS if (height >= 0) { imgsrc_rootvnodes[height] = old_rootvnode; } else { vnode_get_and_drop_always(old_rootvnode); } #else #pragma unused(height) vnode_get_and_drop_always(old_rootvnode); #endif /* CONFIG_IMGSRC_ACCESS */ } return 0; } /* * Return a memory object for given file path. * Also returns a vnode reference for the given file path. */ void * ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size) { int err = 0; struct nameidata ndp = {}; struct vnode *vp = NULL; off_t fsize = 0; vfs_context_t ctx = vfs_context_kernel(); void *control = NULL; NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(filename), ctx); if ((err = namei(&ndp)) != 0) { goto errorout; } nameidone(&ndp); vp = ndp.ni_vp; if ((err = vnode_size(vp, &fsize, ctx)) != 0) { goto errorout; } if (fsize < 0) { goto errorout; } control = ubc_getobject(vp, UBC_FLAGS_NONE); if (control == NULL) { goto errorout; } *file_size = fsize; *vpp = vp; vp = NULL; errorout: if (vp) { vnode_put(vp); } return control; } static int imageboot_read_file_internal(const char *path, const off_t offset, const bool pageable, void **bufp, size_t *bufszp, off_t *fsizep, bool no_softlimit) { int err = 0; struct nameidata ndp = {}; struct vnode *vp = NULL; struct vnode *rsrc_vp = NULL; char *readbuf = NULL; off_t readsize = 0; off_t readoff = 0; off_t fsize = 0; size_t maxsize = 0; char *buf = NULL; bool doclose = false; vfs_context_t ctx = vfs_context_kernel(); proc_t p = vfs_context_proc(ctx); kauth_cred_t kerncred = vfs_context_ucred(ctx); NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF | FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); if ((err = namei(&ndp)) != 0) { AUTHPRNT("namei failed (%s) - %d", path, err); goto out; } nameidone(&ndp); vp = ndp.ni_vp; if ((err = vnode_size(vp, &fsize, ctx)) != 0) { AUTHPRNT("failed to get vnode size of %s - %d", path, err); goto out; } if (fsize < 0) { panic("negative file size"); } if (offset < 0) { AUTHPRNT("negative file offset"); err = EINVAL; goto out; } if (fsizep) { *fsizep = fsize; } if ((err = VNOP_OPEN(vp, FREAD, ctx)) != 0) { AUTHPRNT("failed to open %s - %d", path, err); goto out; } doclose = true; /* cap fsize to the amount that remains after offset */ if (os_sub_overflow(fsize, offset, &fsize)) { fsize = 0; } else if (fsize < 0) { fsize = 0; } /* if bufsz is non-zero, cap the read at bufsz bytes */ maxsize = *bufszp; if (maxsize && (maxsize < (size_t)fsize)) { fsize = maxsize; } /* if fsize is larger than the specified limit (presently 2.5GB) or a NVRAM-configured limit, fail */ maxsize = IMAGEBOOT_MAX_FILESIZE; PE_parse_boot_argn("rootdmg-maxsize", &maxsize, sizeof(maxsize)); if (maxsize && (maxsize < (size_t)fsize)) { AUTHPRNT("file is too large (%lld > %lld)", (long long) fsize, (long long) maxsize); err = EFBIG; goto out; } if (pageable) { vm_offset_t addr = 0; kma_flags_t kma_flags = 0; kma_flags = KMA_PAGEABLE | KMA_DATA_SHARED; if (no_softlimit) { kma_flags |= KMA_NOSOFTLIMIT; } if (kmem_alloc(kernel_map, &addr, (vm_size_t)fsize, kma_flags, VM_KERN_MEMORY_FILE) == KERN_SUCCESS) { buf = (char *)addr; } else { buf = NULL; } } else { zalloc_flags_t zflags = 0; //limit kalloc data calls to only 2GB. if (fsize > IMAGEBOOT_MAX_KALLOCSIZE) { AUTHPRNT("file is too large for non-pageable (%lld)", (long long) fsize); err = ENOMEM; goto out; } zflags = Z_WAITOK; if (no_softlimit) { zflags |= Z_NOSOFTLIMIT; } buf = (char *)kalloc_data((vm_size_t)fsize, zflags); } if (buf == NULL) { err = ENOMEM; goto out; } #if NAMEDSTREAMS /* find resource fork so we can evict cached decmpfs data */ if (VNOP_GETNAMEDSTREAM(vp, &rsrc_vp, XATTR_RESOURCEFORK_NAME, NS_OPEN, /*flags*/ 0, ctx) == 0) { vnode_ref(rsrc_vp); vnode_put(rsrc_vp); AUTHDBG("Found resource fork for %s", path); } #endif /* read data in chunks to handle (fsize > INT_MAX) */ readbuf = buf; readsize = fsize; readoff = offset; while (readsize > 0) { const off_t chunksize_max = 16 * 1024 * 1024; /* 16 MiB */ const off_t chunksize = MIN(readsize, chunksize_max); /* read next chunk, pass IO_NOCACHE to clarify our intent (even if ignored) */ if ((err = vn_rdwr(UIO_READ, vp, (caddr_t)readbuf, (int)chunksize, readoff, UIO_SYSSPACE, IO_NODELOCKED | IO_NOCACHE | IO_RAOFF, kerncred, /*resid*/ NULL, p)) != 0) { AUTHPRNT("Cannot read %lld bytes at offset %lld from %s - %d", (long long)chunksize, (long long)readoff, path, err); goto out; } /* evict cached pages so they don't accumulate during early boot */ ubc_msync(vp, readoff, readoff + chunksize, NULL, UBC_INVALIDATE | UBC_PUSHALL); /* evict potentially-cached decmpfs data if we have a resource fork */ if (rsrc_vp != NULL) { if (vnode_getwithref(rsrc_vp) == 0) { ubc_msync(rsrc_vp, 0, ubc_getsize(rsrc_vp), NULL, UBC_INVALIDATE | UBC_PUSHALL); vnode_put(rsrc_vp); } } readbuf = VM_FAR_ADD_PTR_UNBOUNDED(readbuf, chunksize); readsize -= chunksize; readoff += chunksize; } out: if (doclose) { VNOP_CLOSE(vp, FREAD, ctx); } if (rsrc_vp) { vnode_rele(rsrc_vp); rsrc_vp = NULL; } if (vp) { vnode_put(vp); vp = NULL; } if (err) { if (buf == NULL) { /* nothing to free */ } else if (pageable) { kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)fsize); } else { kfree_data(buf, (vm_size_t)fsize); } } else { *bufp = buf; *bufszp = (size_t)fsize; } return err; } int imageboot_read_file_pageable(const char *path, void **bufp, size_t *bufszp, bool no_softlimit) { return imageboot_read_file_internal(path, 0, true, bufp, bufszp, NULL, no_softlimit); } int imageboot_read_file_from_offset(const char *path, const off_t offset, void **bufp, size_t *bufszp) { return imageboot_read_file_internal(path, offset, false, bufp, bufszp, NULL, /* no_softlimit */ false); } int imageboot_read_file(const char *path, void **bufp, size_t *bufszp, off_t *fsizep) { return imageboot_read_file_internal(path, 0, false, bufp, bufszp, fsizep, /* no_softlimit */ false); } #if CONFIG_IMAGEBOOT_IMG4 || CONFIG_IMAGEBOOT_CHUNKLIST vnode_t imgboot_get_image_file(const char *path, off_t *fsize, int *errp) { struct nameidata ndp = {}; vnode_t vp = NULL; vfs_context_t ctx = vfs_context_kernel(); int err; NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); if ((err = namei(&ndp)) != 0) { AUTHPRNT("Cannot find %s - error %d", path, err); } else { nameidone(&ndp); vp = ndp.ni_vp; if (vp->v_type != VREG) { err = EINVAL; AUTHPRNT("%s it not a regular file", path); } else if (fsize) { if ((err = vnode_size(vp, fsize, ctx)) != 0) { AUTHPRNT("Cannot get file size of %s - error %d", path, err); } } } if (err) { if (vp) { vnode_put(vp); } *errp = err; vp = NULL; } return vp; } #endif /* CONFIG_IMAGEBOOT_CHUNKLIST || CONFIG_IMAGEBOOT_CHUNKLIST */ #if CONFIG_IMAGEBOOT_IMG4 #define APTICKET_NAME "apticket.der" static char * imgboot_get_apticket_path(const char *rootpath, size_t *sz) { size_t plen = strlen(rootpath) + sizeof(APTICKET_NAME) + 1; char *path = (char *)kalloc_data(plen, Z_WAITOK); if (path) { char *slash; strlcpy(path, rootpath, plen); slash = strrchr(path, '/'); if (slash == NULL) { slash = path; } else { slash++; } strlcpy(slash, APTICKET_NAME, sizeof(APTICKET_NAME) + 1); } *sz = plen; return path; } static int authenticate_root_with_img4(const char *rootpath) { errno_t rv; vnode_t vp = NULLVP; size_t ticket_pathsz = 0; char *ticket_path; img4_buff_t tck = IMG4_BUFF_INIT; img4_firmware_execution_context_t exec = { .i4fex_version = IMG4_FIRMWARE_EXECUTION_CONTEXT_STRUCT_VERSION, .i4fex_execute = NULL, .i4fex_context = NULL, }; img4_firmware_t fw = NULL; img4_firmware_flags_t fw_flags = IMG4_FIRMWARE_FLAG_BARE | IMG4_FIRMWARE_FLAG_SUBSEQUENT_STAGE; DBG_TRACE("Check %s\n", rootpath); ticket_path = imgboot_get_apticket_path(rootpath, &ticket_pathsz); if (ticket_path == NULL) { AUTHPRNT("Cannot construct ticket path - out of memory"); return ENOMEM; } rv = imageboot_read_file(ticket_path, (void **)&tck.i4b_bytes, &tck.i4b_len, NULL); if (rv) { AUTHPRNT("Cannot get a ticket from %s - %d\n", ticket_path, rv); goto out_with_ticket_path; } DBG_TRACE("Got %lu bytes of manifest from %s\n", tck.i4b_len, ticket_path); vp = imgboot_get_image_file(rootpath, NULL, &rv); if (vp == NULL) { /* Error message had been printed already */ rv = EIO; goto out_with_ticket_bytes; } fw = img4_firmware_new_from_vnode_4xnu(IMG4_RUNTIME_DEFAULT, &exec, 'rosi', vp, fw_flags); if (!fw) { AUTHPRNT("Could not allocate new firmware"); rv = ENOMEM; goto out_with_ticket_bytes; } img4_firmware_attach_manifest(fw, &tck); rv = img4_firmware_evaluate(fw, img4_chip_select_personalized_ap(), NULL); out_with_ticket_bytes: kfree_data(tck.i4b_bytes, tck.i4b_len); out_with_ticket_path: kfree_data(ticket_path, ticket_pathsz); img4_firmware_destroy(&fw); if (vp) { vnode_put(vp); } return rv; } #endif /* CONFIG_IMAGEBOOT_IMG4 */ /* * Attach the image at 'path' as a ramdisk and mount it as our new rootfs. * All existing mounts are first umounted. */ static int imageboot_mount_ramdisk(const char *path) { int err = 0; size_t bufsz = 0; void *buf = NULL; dev_t dev; vnode_t newdp; vnode_t tvp; mount_t new_rootfs; /* * Read our target image from disk * * We override the allocator soft-limit in order to allow booting large RAM * disks. As a consequence, we are responsible for manipulating the * buffer only through vm_far safe APIs. */ err = imageboot_read_file_pageable(path, &buf, &bufsz, /* no_softlimit */ true); if (err) { printf("%s: failed: imageboot_read_file_pageable() = %d\n", __func__, err); goto out; } DBG_TRACE("%s: read '%s' sz = %lu\n", __func__, path, bufsz); #if CONFIG_IMGSRC_ACCESS /* Re-add all root mounts to the mount list in the correct order... */ mount_list_remove(rootvnode->v_mount); for (int i = 0; i < MAX_IMAGEBOOT_NESTING; i++) { struct vnode *vn = imgsrc_rootvnodes[i]; if (vn) { vnode_getalways(vn); imgsrc_rootvnodes[i] = NULLVP; mount_t mnt = vn->v_mount; mount_lock(mnt); mnt->mnt_flag |= MNT_ROOTFS; mount_list_add(mnt); mount_unlock(mnt); vnode_rele(vn); vnode_put(vn); } } mount_list_add(rootvnode->v_mount); #endif /* ... and unmount everything */ vfs_unmountall(FALSE); lck_rw_lock_exclusive(&rootvnode_rw_lock); kernproc->p_fd.fd_cdir = NULL; tvp = rootvnode; rootvnode = NULL; rootvp = NULLVP; rootdev = NODEV; lck_rw_unlock_exclusive(&rootvnode_rw_lock); vnode_get_and_drop_always(tvp); /* Attach the ramfs image ... */ err = di_root_ramfile_buf(buf, bufsz, rootdevice, DEVMAXNAMESIZE, &dev); if (err) { printf("%s: failed: di_root_ramfile_buf() = %d\n", __func__, err); goto out; } /* ... and mount it */ rootdev = dev; mountroot = NULL; err = vfs_mountroot(); if (err) { printf("%s: failed: vfs_mountroot() = %d\n", __func__, err); goto out; } /* Switch to new root vnode */ if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) { panic("%s: cannot find root vnode", __func__); } vnode_ref(newdp); lck_rw_lock_exclusive(&rootvnode_rw_lock); rootvnode = newdp; rootvnode->v_flag |= VROOT; new_rootfs = rootvnode->v_mount; mount_lock(new_rootfs); new_rootfs->mnt_flag |= MNT_ROOTFS; mount_unlock(new_rootfs); set_fake_bootuuid(new_rootfs); kernproc->p_fd.fd_cdir = newdp; lck_rw_unlock_exclusive(&rootvnode_rw_lock); vnode_put(newdp); DBG_TRACE("%s: root switched\n", __func__); out: if (err && (buf != NULL)) { kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz); } return err; } /* * If the path is in <file://> URL format then we allocate memory and decode it, * otherwise return the same pointer. * * Caller is expected to check if the pointers are different. */ static char * url_to_path(char *url_path, size_t *sz) { char *path = url_path; size_t len = strlen(kIBFilePrefix); if (strncmp(kIBFilePrefix, url_path, len) == 0) { /* its a URL - remove the file:// prefix and percent-decode */ url_path += len; len = strlen(url_path); if (len) { /* Make a copy of the path to URL-decode */ path = (char *)kalloc_data(len + 1, Z_WAITOK); if (path == NULL) { panic("imageboot path allocation failed - cannot allocate %d bytes", (int)len); } strlcpy(path, url_path, len + 1); *sz = len + 1; url_decode(path); } else { panic("Bogus imageboot path URL - missing path"); } DBG_TRACE("%s: root image URL <%s> becomes %s\n", __func__, url_path, path); } return path; } static boolean_t imageboot_setup_new(imageboot_type_t type) { int error; char *root_path = NULL; int height = 0; boolean_t done = FALSE; boolean_t auth_root = TRUE; boolean_t ramdisk_root = FALSE; root_path = zalloc(ZV_NAMEI); assert(root_path != NULL); unsigned imgboot_arg; if (PE_parse_boot_argn("-rootdmg-ramdisk", &imgboot_arg, sizeof(imgboot_arg))) { ramdisk_root = TRUE; } if (PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) { printf("%s: container image url is %s\n", __FUNCTION__, root_path); error = imageboot_mount_image(root_path, height, type); if (error != 0) { panic("Failed to mount container image."); } height++; } if (PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN) == FALSE && PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) { if (height > 0) { panic("%s specified without %s or %s?", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_AUTHROOT_ARG, IMAGEBOOT_ROOT_ARG); } goto out; } printf("%s: root image URL is '%s'\n", __func__, root_path); /* Make a copy of the path to URL-decode */ size_t pathsz; char *path = url_to_path(root_path, &pathsz); assert(path); #if CONFIG_IMAGEBOOT_CHUNKLIST if (auth_root) { /* * This updates auth_root to reflect whether chunklist was * actually enforced. In effect, this clears auth_root if * CSR_ALLOW_ANY_RECOVERY_OS allowed an invalid image. */ AUTHDBG("authenticating root image at %s", path); error = authenticate_root_with_chunklist(path, &auth_root); if (error) { panic("root image authentication failed (err = %d)", error); } AUTHDBG("successfully authenticated %s", path); } #endif if (ramdisk_root) { error = imageboot_mount_ramdisk(path); } else { error = imageboot_mount_image(root_path, height, type); } if (path != root_path) { kfree_data(path, pathsz); } if (error) { if (error == EFBIG) { panic("root imagefile is too large (err=%d, auth=%d, ramdisk=%d)", error, auth_root, ramdisk_root); } else { panic("Failed to mount root image (err=%d, auth=%d, ramdisk=%d)", error, auth_root, ramdisk_root); } } #if CONFIG_IMAGEBOOT_CHUNKLIST if (auth_root) { /* check that the image version matches the running kernel */ AUTHDBG("checking root image version"); error = authenticate_root_version_check(); if (error) { panic("root image version check failed"); } else { AUTHDBG("root image version matches kernel"); } } #endif done = TRUE; out: zfree(ZV_NAMEI, root_path); return done; } __private_extern__ void imageboot_setup(imageboot_type_t type) { int error = 0; char *root_path = NULL; DBG_TRACE("%s: entry\n", __FUNCTION__); if (rootvnode == NULL) { panic("imageboot_setup: rootvnode is NULL."); } /* * New boot-arg scheme: * root-dmg : the dmg that will be the root filesystem, authenticated by default. * auth-root-dmg : same as root-dmg. * container-dmg : an optional dmg that contains the root-dmg. * locker : the locker that will be the root filesystem -- mutually * exclusive with any other boot-arg. */ if (imageboot_setup_new(type)) { return; } root_path = zalloc(ZV_NAMEI); assert(root_path != NULL); /* * Look for outermost disk image to root from. If we're doing a nested boot, * there's some sense in which the outer image never needs to be the root filesystem, * but it does need very similar treatment: it must not be unmounted, needs a fake * device vnode created for it, and should not show up in getfsstat() until exposed * with MNT_IMGSRC. We just make it the temporary root. */ #if CONFIG_IMAGEBOOT_IMG4 if (PE_parse_boot_argn("arp0", root_path, MAXPATHLEN)) { size_t pathsz; char *path = url_to_path(root_path, &pathsz); assert(path); if (authenticate_root_with_img4(path)) { panic("Root image %s does not match the manifest", root_path); } if (path != root_path) { kfree_data(path, pathsz); } } else #endif /* CONFIG_IMAGEBOOT_IMG4 */ if ((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) && (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) { panic("%s: no valid path to image.", __FUNCTION__); } DBG_TRACE("%s: root image url is %s\n", __FUNCTION__, root_path); error = imageboot_mount_image(root_path, 0, type); if (error) { panic("Failed on first stage of imageboot."); } /* * See if we are rooting from a nested image */ if (PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) { goto done; } printf("%s: second level root image url is %s\n", __FUNCTION__, root_path); /* * If we fail to set up second image, it's not a given that we * can safely root off the first. */ error = imageboot_mount_image(root_path, 1, type); if (error) { panic("Failed on second stage of imageboot."); } done: zfree(ZV_NAMEI, root_path); DBG_TRACE("%s: exit\n", __FUNCTION__); return; } |