Loading...
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
/*
 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*
 * @OSF_COPYRIGHT@
 */
/*
 * Mach Operating System
 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
 * All Rights Reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
/*
 */

#ifndef _KERN_ZALLOC_INTERNAL_H_
#define _KERN_ZALLOC_INTERNAL_H_

#include <kern/zalloc.h>
#include <kern/locks.h>
#include <kern/simple_lock.h>

#include <os/atomic_private.h>
#include <os/base.h> /* OS_PTRAUTH_SIGNED_PTR */
#include <sys/queue.h>
#include <vm/vm_map_internal.h>

#if KASAN
#include <san/kasan.h>
#include <kern/spl.h>
#endif /* !KASAN */

/*
 * Disable zalloc zero validation under kasan as it is
 * double-duty with what kasan already does.
 */
#if KASAN
#define ZALLOC_ENABLE_ZERO_CHECK        0
#else
#define ZALLOC_ENABLE_ZERO_CHECK        1
#endif

#if KASAN
#define ZALLOC_ENABLE_LOGGING           0
#elif DEBUG || DEVELOPMENT
#define ZALLOC_ENABLE_LOGGING           1
#else
#define ZALLOC_ENABLE_LOGGING           0
#endif

/*!
 * @file <kern/zalloc_internal.h>
 *
 * @abstract
 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
 * kalloc subsystems.
 */

__BEGIN_DECLS

__exported_push_hidden

/*
 *	A zone is a collection of fixed size blocks for which there
 *	is fast allocation/deallocation access.  Kernel routines can
 *	use zones to manage data structures dynamically, creating a zone
 *	for each type of data structure to be managed.
 *
 */

/*!
 * @typedef zone_pva_t
 *
 * @brief
 * Type used to point to a page virtual address in the zone allocator.
 *
 * @description
 * - Valid pages have the top bit set.
 * - 0 represents the "NULL" page
 * - non 0 values with the top bit cleared represent queue heads,
 *   indexed from the beginning of the __DATA section of the kernel.
 *   (see zone_pageq_base).
 */
typedef struct zone_packed_virtual_address {
	uint32_t packed_address;
} zone_pva_t;

/*!
 * @struct zone_stats
 *
 * @abstract
 * Per-cpu structure used for basic zone stats.
 *
 * @discussion
 * The values aren't scaled for per-cpu zones.
 */
struct zone_stats {
	uint64_t            zs_mem_allocated;
	uint64_t            zs_mem_freed;
	uint64_t            zs_alloc_fail;
	uint32_t            zs_alloc_rr;     /* allocation rr bias */
	uint32_t _Atomic    zs_alloc_not_early;
};

typedef struct zone_magazine *zone_magazine_t;

/*!
 * @struct zone_depot
 *
 * @abstract
 * Holds a list of full and empty magazines.
 *
 * @discussion
 * The data structure is a "STAILQ" and an "SLIST" combined with counters
 * to know their lengths in O(1). Here is a graphical example:
 *
 *      zd_full = 3
 *      zd_empty = 1
 * ╭─── zd_head
 * │ ╭─ zd_tail
 * │ ╰────────────────────────────────────╮
 * │    ╭───────╮   ╭───────╮   ╭───────╮ v ╭───────╮
 * ╰───>│███████┼──>│███████┼──>│███████┼──>│       ┼─> X
 *      ╰───────╯   ╰───────╯   ╰───────╯   ╰───────╯
 */
struct zone_depot {
	uint32_t            zd_full;
	uint32_t            zd_empty;
	zone_magazine_t     zd_head;
	zone_magazine_t    *zd_tail;
};

/* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
#define Z_MAGIC_QUO(s)      (((1ull << 32) - 1) / (uint64_t)(s) + 1)
#define Z_MAGIC_ALIGNED(s)  (~0u / (uint32_t)(s) + 1)

/*
 * Returns (offs / size) if offs is small enough
 * and magic = Z_MAGIC_QUO(size)
 */
static inline uint32_t
Z_FAST_QUO(uint64_t offs, uint64_t magic)
{
	return (offs * magic) >> 32;
}

/*
 * Returns (offs % size) if offs is small enough
 * and magic = Z_MAGIC_QUO(size)
 */
static inline uint32_t
Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
{
	uint32_t lowbits = (uint32_t)(offs * magic);

	return (lowbits * size) >> 32;
}

/*
 * Returns whether (offs % size) == 0 if offs is small enough
 * and magic = Z_MAGIC_ALIGNED(size)
 */
static inline bool
Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
{
	return (uint32_t)(offs * magic) < magic;
}

struct zone_size_params {
	uint32_t            z_align_magic;  /* magic to use with Z_FAST_ALIGNED()  */
	uint32_t            z_elem_size;    /* size of an element                  */
};

struct zone_expand {
	struct zone_expand *ze_next;
	thread_t            ze_thread;
	bool                ze_pg_wait;
	bool                ze_vm_priv;
	bool                ze_clear_priv;
};

#define Z_WMA_UNIT (1u << 8)
#define Z_WMA_MIX(base, e)  ((3 * (base) + (e) * Z_WMA_UNIT) / 4)

struct zone {
	/*
	 * Readonly / rarely written fields
	 */

	/*
	 * The first 4 fields match a zone_view.
	 *
	 * z_self points back to the zone when the zone is initialized,
	 * or is NULL else.
	 */
	struct zone        *z_self;
	zone_stats_t        z_stats;
	const char         *z_name;
	struct zone_view   *z_views;
	struct zone_expand *z_expander;

	uint64_t            z_quo_magic;
	uint32_t            z_align_magic;
	uint16_t            z_elem_size;
	uint16_t            z_elem_offs;
	uint16_t            z_chunk_pages;
	uint16_t            z_chunk_elems;

	uint32_t /* 32 bits */
	/*
	 * Lifecycle state (Mutable after creation)
	 */
	    z_destroyed        :1,  /* zone is (being) destroyed */
	    z_async_refilling  :1,  /* asynchronous allocation pending? */
	    z_depot_cleanup    :1,  /* per cpu depots need cleaning */
	    z_expanding_wait   :1,  /* is thread waiting for expansion? */
	    z_exhausted_wait   :1,  /* are threads waiting for exhaustion end */
	    z_exhausts         :1,  /* whether the zone exhausts by design */

	/*
	 * Behavior configuration bits
	 */
	    z_percpu           :1,  /* the zone is percpu */
	    z_smr              :1,  /* the zone uses SMR */
	    z_permanent        :1,  /* the zone allocations are permanent */
	    z_nocaching        :1,  /* disallow zone caching for this zone */
	    collectable        :1,  /* garbage collect empty pages */
	    no_callout         :1,
	    z_destructible     :1,  /* zone can be zdestroy()ed  */

	    _reserved          :8,

	/*
	 * Debugging features
	 */
	    z_kasan_fakestacks :1,
	    z_kasan_quarantine :1,  /* whether to use the kasan quarantine */
	    z_tags_sizeclass   :6,  /* idx into zone_tags_sizeclasses to associate
	                             * sizeclass for a particualr kalloc tag */
	    z_uses_tags        :1,
	    z_log_on           :1,  /* zone logging was enabled by boot-arg */
	    _reserved2         :1;

	uint8_t             z_cacheline1[0] __attribute__((aligned(64)));

	/*
	 * Zone caching / recirculation cacheline
	 *
	 * z_recirc* fields are protected by the recirculation lock.
	 *
	 * z_recirc_cont_wma:
	 *   weighted moving average of the number of contentions per second,
	 *   in Z_WMA_UNIT units (fixed point decimal).
	 *
	 * z_recirc_cont_cur:
	 *   count of recorded contentions that will be fused
	 *   in z_recirc_cont_wma at the next period.
	 *
	 *   Note: if caching is disabled,
	 *   this field is used under the zone lock.
	 *
	 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
	 *   tracks the history of the minimum values of z_elems_free over time
	 *   with "min" being the minimum it hit for the current period,
	 *   and "wma" the weighted moving average of those value
	 *   (in Z_WMA_UNIT units).
	 *
	 *   This field is used if z_pcpu_cache is NULL,
	 *   otherwise it aliases with z_recirc_empty_{min,wma}
	 *
	 * z_recirc_{full,empty}_{min,wma}:
	 *   tracks the history of the the minimum number of full/empty
	 *   magazines in the depot over time, with "min" being the minimum
	 *   it hit for the current period, and "wma" the weighted moving
	 *   average of those value (in Z_WMA_UNIT units).
	 */
	struct zone_cache  *__zpercpu OS_PTRAUTH_SIGNED_PTR("zone.z_pcpu_cache") z_pcpu_cache;
	struct zone_depot   z_recirc;

	hw_lck_ticket_t     z_recirc_lock;
	uint32_t            z_recirc_full_min;
	uint32_t            z_recirc_full_wma;
	union {
		uint32_t    z_recirc_empty_min;
		uint32_t    z_elems_free_min;
	};
	union {
		uint32_t    z_recirc_empty_wma;
		uint32_t    z_elems_free_wma;
	};
	uint32_t            z_recirc_cont_cur;
	uint32_t            z_recirc_cont_wma;

	uint16_t            z_depot_size;
	uint16_t            z_depot_limit;

	uint8_t             z_cacheline2[0] __attribute__((aligned(64)));

	/*
	 * often mutated fields
	 */

	hw_lck_ticket_t     z_lock;

	/*
	 * Page accounting (wired / VA)
	 *
	 * Those numbers are unscaled for z_percpu zones
	 * (zone_scale_for_percpu() needs to be used to find the true value).
	 */
	uint32_t            z_wired_max;    /* how large can this zone grow        */
	uint32_t            z_wired_hwm;    /* z_wired_cur high watermark          */
	uint32_t            z_wired_cur;    /* number of pages used by this zone   */
	uint32_t            z_wired_empty;  /* pages collectable by GC             */
	uint32_t            z_va_cur;       /* amount of VA used by this zone      */

	/*
	 * list of metadata structs, which maintain per-page free element lists
	 */
	zone_pva_t          z_pageq_empty;  /* populated, completely empty pages   */
	zone_pva_t          z_pageq_partial;/* populated, partially filled pages   */
	zone_pva_t          z_pageq_full;   /* populated, completely full pages    */
	zone_pva_t          z_pageq_va;     /* non-populated VA pages              */

	/*
	 * Zone statistics
	 *
	 * z_elems_avail:
	 *   number of elements in the zone (at all).
	 */
	uint32_t            z_elems_free;   /* Number of free elements             */
	uint32_t            z_elems_avail;  /* Number of elements available        */
	uint32_t            z_elems_rsv;
	uint32_t            z_array_size_class;

	struct zone        *z_kt_next;

	uint8_t             z_cacheline3[0] __attribute__((aligned(64)));

#if KASAN_CLASSIC
	uint16_t            z_kasan_redzone;
	spl_t               z_kasan_spl;
#endif

#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
	/*
	 * the allocation logs are used when:
	 *
	 * - zlog<n>= boot-args are used (and then z_log_on is set)
	 *
	 * - the leak detection was triggered for the zone.
	 *   In that case, the log can't ever be freed,
	 *   but it can be enabled/disabled dynamically.
	 */
	struct btlog       *z_btlog;
	struct btlog       *z_btlog_disabled;
#endif
} __attribute__((aligned((64))));

/*!
 * @typedef zone_security_flags_t
 *
 * @brief
 * Type used to store the immutable security properties of a zone.
 *
 * @description
 * These properties influence the security nature of a zone and can't be
 * modified after lockdown.
 */
typedef struct zone_security_flags {
	uint16_t
	/*
	 * Security sensitive configuration bits
	 */
	    z_submap_idx       :8,  /* a Z_SUBMAP_IDX_* value */
	    z_kheap_id         :3,  /* zone_kheap_id_t when part of a kalloc heap */
	    z_kalloc_type      :1,  /* zones that does types based seggregation */
	    z_lifo             :1,  /* depot and recirculation layer are LIFO */
	    z_submap_from_end  :1,  /* allocate from the left or the right ? */
	    z_noencrypt        :1,  /* do not encrypt pages when hibernating */
	    z_unused           :1;
	/*
	 * Signature equivalance zone
	 */
	zone_id_t           z_sig_eq;
} zone_security_flags_t;


/*
 * Zsecurity config to enable strict free of iokit objects to zone
 * or heap they were allocated from.
 *
 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
 * not break third party kexts that haven't yet been recompiled
 * to use the new iokit macros.
 */
#if XNU_PLATFORM_MacOSX && __x86_64__
#   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           OFF
#else
#   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           ON
#endif

/*
 * Zsecurity config to enable the read-only allocator
 */
#if KASAN_CLASSIC
#   define ZSECURITY_CONFIG_READ_ONLY                   OFF
#else
#   define ZSECURITY_CONFIG_READ_ONLY                   ON
#endif

/*
 * Zsecurity config to enable making heap feng-shui
 * less reliable.
 */
#if KASAN_CLASSIC
#   define ZSECURITY_CONFIG_SAD_FENG_SHUI               OFF
#   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             1
#else
#   define ZSECURITY_CONFIG_SAD_FENG_SHUI               ON
#   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             4
#endif

/*
 * Zsecurity config to enable adjusting of elements
 * with PGZ-OOB to right-align them in their space.
 */
#if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING
#   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              OFF
#else
#   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              ON
#endif

/*
 * Zsecurity config to enable kalloc type segregation
 */
#if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
#   define ZSECURITY_CONFIG_KT_BUDGET                   120
#   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
#else
#   define ZSECURITY_CONFIG_KT_BUDGET                   260
#   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
#endif

/*
 * Zsecurity config to enable (KASAN) tagging of memory allocations
 */
#if CONFIG_KERNEL_TAGGING
#   define ZSECURITY_CONFIG_ZONE_TAGGING                ON
#else
#   define ZSECURITY_CONFIG_ZONE_TAGGING                OFF
#endif


__options_decl(kalloc_type_options_t, uint64_t, {
	/*
	 * kalloc type option to switch default accounting to private.
	 */
	KT_OPTIONS_ACCT                         = 0x00000001,
	/*
	 * kalloc type option to print additional stats regarding zone
	 * budget distribution and signatures.
	 */
	KT_OPTIONS_DEBUG                        = 0x00000002,
	/*
	 * kalloc type option to allow loose freeing between heaps
	 */
	KT_OPTIONS_LOOSE_FREE                   = 0x00000004,
});

__enum_decl(kt_var_heap_id_t, uint32_t, {
	/*
	 * Fake "data" heap used to link views of data-only allocation that
	 * have been redirected to KHEAP_DATA_PRIVATE
	 */
	KT_VAR_DATA_HEAP,
	/*
	 * Fake "data" heap used to link views of data-only allocation that
	 * have been redirected to KHEAP_DATA_SHARED
	 */
	KT_VAR_DATA_SHARED_HEAP,
	/*
	 * Heaps for pointer arrays
	 */
	KT_VAR_PTR_HEAP0,
	KT_VAR_PTR_HEAP1,
	/*
	 * Indicating first additional heap added
	 */
	KT_VAR__FIRST_FLEXIBLE_HEAP,
});

/*
 * Zone submap indices
 *
 * Z_SUBMAP_IDX_VM
 * this map has the special property that its allocations
 * can be done without ever locking the submap, and doesn't use
 * VM entries in the map (which limits certain VM map operations on it).
 *
 * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
 *
 * On LP64 it is also used to restrict VM allocations on LP64 lower
 * in the kernel VA space, for pointer packing purposes.
 *
 * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
 * used for unrestricted allocations
 *
 * Z_SUBMAP_IDX_DATA
 * used to sequester bags of bytes from all other allocations and allow VA reuse
 * within the map
 *
 * Z_SUBMAP_IDX_READ_ONLY
 * used for the read-only allocator
 */
__enum_decl(zone_submap_idx_t, uint32_t, {
	Z_SUBMAP_IDX_VM,
	Z_SUBMAP_IDX_READ_ONLY,
	Z_SUBMAP_IDX_GENERAL_0,
#if ZSECURITY_CONFIG(SAD_FENG_SHUI)
	Z_SUBMAP_IDX_GENERAL_1,
	Z_SUBMAP_IDX_GENERAL_2,
	Z_SUBMAP_IDX_GENERAL_3,
#endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
	Z_SUBMAP_IDX_DATA,

	Z_SUBMAP_IDX_COUNT,
});

#define KALLOC_MINALIGN     (1 << KALLOC_LOG2_MINALIGN)

/*
 * Variable kalloc_type heap config
 */
struct kheap_info {
	zone_id_t               kh_zstart;
	kalloc_heap_t           kh_views;
	kalloc_type_var_view_t  kt_views;
};
typedef union kalloc_type_views {
	struct kalloc_type_view     *ktv_fixed;
	struct kalloc_type_var_view *ktv_var;
} kalloc_type_views_t;

#define KT_VAR_MAX_HEAPS 8
#define MAX_ZONES       690
extern struct kheap_info        kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
extern zone_id_t _Atomic        num_zones;
extern uint32_t                 zone_view_count;
extern struct zone              zone_array[MAX_ZONES];
extern struct zone_size_params  zone_ro_size_params[ZONE_ID__LAST_RO + 1];
extern zone_security_flags_t    zone_security_array[];
extern const char * const       kalloc_heap_names[KHEAP_ID_COUNT];
extern mach_memory_info_t      *panic_kext_memory_info;
extern vm_size_t                panic_kext_memory_size;
extern vm_offset_t              panic_fault_address;
extern uint16_t                 _zc_mag_size;

#define zone_index_foreach(i) \
	for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
	    i < num_zones_##i; i++)

#define zone_foreach(z) \
	for (zone_t z = &zone_array[1], \
	    last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
	    z < last_zone_##z; z++)

__abortlike
extern void zone_invalid_panic(zone_t zone);

__pure2
static inline zone_id_t
zone_index(zone_t z)
{
	unsigned long delta;
	uint64_t quo;

	delta = (unsigned long)z - (unsigned long)zone_array;
	if (delta >= MAX_ZONES * sizeof(*z)) {
		zone_invalid_panic(z);
	}
	quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
	__builtin_assume(quo < MAX_ZONES);
	return (zone_id_t)quo;
}

__pure2
static inline bool
zone_is_ro(zone_t zone)
{
	return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
	       zone <= &zone_array[ZONE_ID__LAST_RO];
}

static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
{
	return atop(addr ^ (addr + size - 1)) != 0;
}

__pure2
static inline uint16_t
zone_elem_redzone(zone_t zone)
{
#if KASAN_CLASSIC
	return zone->z_kasan_redzone;
#else
	(void)zone;
	return 0;
#endif
}

__pure2
static inline uint16_t
zone_elem_inner_offs(zone_t zone)
{
	return zone->z_elem_offs;
}

__pure2
static inline uint16_t
zone_elem_outer_offs(zone_t zone)
{
	return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
}

__pure2
static inline vm_offset_t
zone_elem_inner_size(zone_t zone)
{
	return zone->z_elem_size;
}

__pure2
static inline vm_offset_t
zone_elem_outer_size(zone_t zone)
{
	return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
}

__pure2
static inline zone_security_flags_t
zone_security_config(zone_t z)
{
	zone_id_t zid = zone_index(z);
	return zone_security_array[zid];
}

static inline uint32_t
zone_count_free(zone_t zone)
{
	return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
}

static inline uint32_t
zone_count_allocated(zone_t zone)
{
	return zone->z_elems_avail - zone_count_free(zone);
}

static inline vm_size_t
zone_scale_for_percpu(zone_t zone, vm_size_t size)
{
	if (zone->z_percpu) {
		size *= zpercpu_count();
	}
	return size;
}

static inline vm_size_t
zone_size_wired(zone_t zone)
{
	/*
	 * this either require the zone lock,
	 * or to be used for statistics purposes only.
	 */
	vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
	return zone_scale_for_percpu(zone, size);
}

static inline vm_size_t
zone_size_free(zone_t zone)
{
	return zone_scale_for_percpu(zone,
	           zone_elem_inner_size(zone) * zone_count_free(zone));
}

/* Under KASAN builds, this also accounts for quarantined elements. */
static inline vm_size_t
zone_size_allocated(zone_t zone)
{
	return zone_scale_for_percpu(zone,
	           zone_elem_inner_size(zone) * zone_count_allocated(zone));
}

static inline vm_size_t
zone_size_wasted(zone_t zone)
{
	return zone_size_wired(zone) - zone_scale_for_percpu(zone,
	           zone_elem_outer_size(zone) * zone->z_elems_avail);
}

__pure2
static inline bool
zone_exhaustible(zone_t zone)
{
	return zone->z_wired_max != ~0u;
}

__pure2
static inline bool
zone_exhausted(zone_t zone)
{
	return zone->z_wired_cur >= zone->z_wired_max;
}

/*
 * Set and get the signature equivalance for the given zone
 */
extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq);
extern zone_id_t zone_get_sig_eq(zone_t zone);
/*
 * Return the accumulated allocated memory on the given zone stats
 */
static inline vm_size_t
zone_stats_get_mem_allocated(zone_stats_t stats)
{
	return stats->zs_mem_allocated;
}

/*
 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
 * userspace reboot is needed. The only other way to query for this information
 * is via mach_memory_info() which is unavailable on release kernels.
 */
extern uint64_t get_zones_collectable_bytes(void);

/*!
 * @enum zone_gc_level_t
 *
 * @const ZONE_GC_TRIM
 * Request a trimming GC: it will trim allocations in excess
 * of the working set size estimate only.
 *
 * @const ZONE_GC_DRAIN
 * Request a draining GC: this is an aggressive mode that will
 * cause all caches to be drained and all free pages returned to the system.
 *
 * @const ZONE_GC_JETSAM
 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
 * @c ZONE_GC_DRAIN depending on the state of the zone map.
 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
 * request a @c ZONE_GC_JETSAM level.
 */
__enum_closed_decl(zone_gc_level_t, uint32_t, {
	ZONE_GC_TRIM,
	ZONE_GC_DRAIN,
	ZONE_GC_JETSAM,
});

/*!
 * @function zone_gc
 *
 * @brief
 * Reduces memory used by zones by trimming caches and freelists.
 *
 * @discussion
 * @c zone_gc() is called:
 * - by the pageout daemon when the system needs more free pages.
 * - by the VM when contiguous page allocation requests get stuck
 *   (see vm_page_find_contiguous()).
 *
 * @param level         The zone GC level requested.
 */
extern void     zone_gc(zone_gc_level_t level);

#define ZONE_WSS_UPDATE_PERIOD  15
/*!
 * @function compute_zone_working_set_size
 *
 * @brief
 * Recomputes the working set size for every zone
 *
 * @discussion
 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
 * computing an exponential moving average with a weight of 75%,
 * so that the history of the last minute is the dominating factor.
 */
extern void     compute_zone_working_set_size(void *);

/* Debug logging for zone-map-exhaustion jetsams. */
extern void     get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
extern void     get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);

/* Bootstrap zone module (create zone zone) */
extern void     zone_bootstrap(void);

/* Force-enable caching on a zone, generally unsafe to call directly */
extern void     zone_enable_caching(zone_t zone);

/*!
 * @function zone_early_mem_init
 *
 * @brief
 * Steal memory from pmap (prior to initialization of zalloc)
 * for the special vm zones that allow bootstrap memory and store
 * the range so as to facilitate range checking in zfree.
 *
 * @param size              the size to steal (must be a page multiple)
 */
__startup_func
extern vm_offset_t zone_early_mem_init(
	vm_size_t       size);

/*!
 * @function zone_get_early_alloc_size
 *
 * @brief
 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
 * of the allocation granule for the zone with the given creation flags and
 * element size.
 */
__startup_func
extern vm_size_t zone_get_early_alloc_size(
	const char          *name __unused,
	vm_size_t            elem_size,
	zone_create_flags_t  flags,
	vm_size_t            min_elems);

/*!
 * @function zone_cram_early
 *
 * @brief
 * Cram memory allocated with @c zone_early_mem_init() into a zone.
 *
 * @param zone          The zone to cram memory into.
 * @param newmem        The base address for the memory to cram.
 * @param size          The size of the memory to cram into the zone.
 */
__startup_func
extern void     zone_cram_early(
	zone_t          zone,
	vm_offset_t     newmem,
	vm_size_t       size);

extern bool     zone_maps_owned(
	vm_address_t    addr,
	vm_size_t       size);

#if KASAN_LIGHT
extern bool     kasan_zone_maps_owned(
	vm_address_t    addr,
	vm_size_t       size);
#endif /* KASAN_LIGHT */

extern void     zone_map_sizes(
	vm_map_size_t  *psize,
	vm_map_size_t  *pfree,
	vm_map_size_t  *plargest_free);

extern bool
zone_map_nearing_exhaustion(void);

static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)
{
	return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
}

extern struct kalloc_result zalloc_ext(
	zone_t          zone,
	zone_stats_t    zstats,
	zalloc_flags_t  flags);

#if KASAN
#define ZFREE_PACK_SIZE(esize, usize)   (((uint64_t)(usize) << 32) | (esize))
#define ZFREE_ELEM_SIZE(combined)       ((uint32_t)(combined))
#define ZFREE_USER_SIZE(combined)       ((combined) >> 32)
#else
#define ZFREE_PACK_SIZE(esize, usize)   (esize)
#define ZFREE_ELEM_SIZE(combined)       (combined)
#endif

extern void     zfree_ext(
	zone_t          zone,
	zone_stats_t    zstats,
	void           *addr,
	uint64_t        combined_size);

extern zone_id_t zone_id_for_element(
	void           *addr,
	vm_size_t       esize);

#if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
extern void *zone_element_pgz_oob_adjust(
	void           *addr,
	vm_size_t       req_size,
	vm_size_t       elem_size);
#endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */

extern void zone_element_bounds_check(
	vm_address_t    addr,
	vm_size_t       len);

extern vm_size_t zone_element_size(
	void           *addr,
	zone_t         *z,
	bool            clear_oob,
	vm_offset_t    *oob_offs);

/*!
 * @function zone_spans_ro_va
 *
 * @abstract
 * This function is used to check whether the specified address range
 * spans through the read-only zone range.
 *
 * @discussion
 * This only checks for the range specified within ZONE_ADDR_READONLY.
 * The parameters addr_start and addr_end are stripped off of PAC bits
 * before the check is made.
 */
extern bool zone_spans_ro_va(
	vm_offset_t     addr_start,
	vm_offset_t     addr_end);

/*!
 * @function __zalloc_ro_mut_atomic
 *
 * @abstract
 * This function is called from the pmap to perform the specified atomic
 * operation on memory from the read-only allocator.
 *
 * @discussion
 * This function is for internal use only and should not be called directly.
 */
static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
{
#define __ZALLOC_RO_MUT_OP(op, op2) \
	case ZRO_ATOMIC_##op##_8: \
	        return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
	case ZRO_ATOMIC_##op##_16: \
	        return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
	case ZRO_ATOMIC_##op##_32: \
	        return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
	case ZRO_ATOMIC_##op##_64: \
	        return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)

	switch (op) {
		__ZALLOC_RO_MUT_OP(OR, or_orig);
		__ZALLOC_RO_MUT_OP(XOR, xor_orig);
		__ZALLOC_RO_MUT_OP(AND, and_orig);
		__ZALLOC_RO_MUT_OP(ADD, add_orig);
		__ZALLOC_RO_MUT_OP(XCHG, xchg);
	default:
		panic("%s: Invalid atomic operation: %d", __func__, op);
	}

#undef __ZALLOC_RO_MUT_OP
}

/*!
 * @function zone_owns
 *
 * @abstract
 * This function is a soft version of zone_require that checks if a given
 * pointer belongs to the specified zone and should not be used outside
 * allocator code.
 *
 * @discussion
 * Note that zone_owns() can only work with:
 * - zones not allowing foreign memory
 * - zones in the general submap.
 *
 * @param zone          the zone the address needs to belong to.
 * @param addr          the element address to check.
 */
extern bool     zone_owns(
	zone_t          zone,
	void           *addr);

#ifndef VM_TAG_SIZECLASSES
#error MAX_TAG_ZONES
#endif
#if VM_TAG_SIZECLASSES

extern uint16_t zone_index_from_tag_index(
	uint32_t        tag_zone_index);

#endif /* VM_TAG_SIZECLASSES */

extern lck_grp_t zone_locks_grp;

static inline void
zone_lock(zone_t zone)
{
#if KASAN_FAKESTACK
	spl_t s = 0;
	if (zone->z_kasan_fakestacks) {
		s = splsched();
	}
#endif /* KASAN_FAKESTACK */
	hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
#if KASAN_FAKESTACK
	zone->z_kasan_spl = s;
#endif /* KASAN_FAKESTACK */
}

static inline void
zone_unlock(zone_t zone)
{
#if KASAN_FAKESTACK
	spl_t s = zone->z_kasan_spl;
	zone->z_kasan_spl = 0;
#endif /* KASAN_FAKESTACK */
	hw_lck_ticket_unlock(&zone->z_lock);
#if KASAN_FAKESTACK
	if (zone->z_kasan_fakestacks) {
		splx(s);
	}
#endif /* KASAN_FAKESTACK */
}

int track_this_zone(const char *zonename, const char *logname);
extern bool panic_include_kalloc_types;
extern zone_t kalloc_type_src_zone;
extern zone_t kalloc_type_dst_zone;

#if DEBUG || DEVELOPMENT
extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
#endif /* DEBUG || DEVELOPMENT */

#define VM_SUBMAP_NULL_SAFETY_DELTA (PAGE_SIZE * 3)

__exported_pop

__END_DECLS

#endif  /* _KERN_ZALLOC_INTERNAL_H_ */