Loading...
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
// Copyright (c) 2021-2023 Apple Inc.  All rights reserved.
//
// @APPLE_OSREFERENCE_LICENSE_HEADER_START@
//
// This file contains Original Code and/or Modifications of Original Code
// as defined in and that are subject to the Apple Public Source License
// Version 2.0 (the 'License'). You may not use this file except in
// compliance with the License. The rights granted to you under the License
// may not be used to create, or enable the creation or redistribution of,
// unlawful or unlicensed copies of an Apple operating system, or to
// circumvent, violate, or enable the circumvention or violation of, any
// terms of an Apple operating system software license agreement.
//
// Please obtain a copy of the License at
// http://www.opensource.apple.com/apsl/ and read it before using this file.
//
// The Original Code and all software distributed under the License are
// distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
// EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
// INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
// Please see the License for the specific language governing rights and
// limitations under the License.
//
// @APPLE_OSREFERENCE_LICENSE_HEADER_END@

#ifndef KERN_RECOUNT_H
#define KERN_RECOUNT_H

#include <os/base.h>
#include <stdbool.h>
#include <stdint.h>
#include <sys/cdefs.h>
#include <sys/_types/_size_t.h>
#include <kern/cpc.h>

#if CONFIG_SPTM
// Track counters in secure execution contexts when the SPTM is available.
#define RECOUNT_SECURE_METRICS 1
#else // CONFIG_SPTM
#define RECOUNT_SECURE_METRICS 0
#endif // !CONFIG_SPTM

#if __arm64__
// Only ARM64 keeps precise track of user/system based on thread state.
#define RECOUNT_THREAD_BASED_LEVEL 1
#else // __arm64__
#define RECOUNT_THREAD_BASED_LEVEL 0
#endif // !__arm64__

__BEGIN_DECLS;

// Recount maintains counters for resources used by software, like CPU time and cycles.
// These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in.
// For instance, while threads only differentiate on the broad CPU kinds due to memory constraints,
// the fewer number of tasks are free to use more memory and accumulate counters per-CPU.
//
// At context-switch, the scheduler calls `recount_switch_thread` to update the counters.
// The difference between the current counter values and per-CPU snapshots are added to each thread.
// On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel.

#pragma mark - config

// A domain of the system's CPU topology, used as granularity when tracking counter values.
__enum_decl(recount_topo_t, unsigned int, {
	// Attribute counts to the entire system, i.e. only a single counter.
	// Note that mutual exclusion must be provided to update this kind of counter.
	RCT_TOPO_SYSTEM,
	// Attribute counts to the CPU they accumulated on.
	// Mutual exclusion is not required to update this counter, but preemption must be disabled.
	RCT_TOPO_CPU,
	// Attribute counts to the CPU kind (e.g. P or E).
	// Note that mutual exclusion must be provided to update this kind of counter.
	RCT_TOPO_CPU_KIND,
	// The number of different topographies.
	RCT_TOPO_COUNT,
});

// Get the number of elements in an array for per-topography data.
size_t recount_topo_count(recount_topo_t topo);

// Recount's definitions of CPU kinds, in lieu of one from the platform layers.
__enum_decl(recount_cpu_kind_t, unsigned int, {
	RCT_CPU_EFFICIENCY,
	RCT_CPU_PERFORMANCE,
	RCT_CPU_KIND_COUNT,
});

// A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters.
typedef const struct recount_plan {
	const char *rpl_name;
	recount_topo_t rpl_topo;
} *recount_plan_t;

#define RECOUNT_PLAN_DECLARE(_name) \
    extern const struct recount_plan _name;

#define RECOUNT_PLAN_DEFINE(_name, _topo) \
	const struct recount_plan _name = { \
	        .rpl_name = #_name, \
	        .rpl_topo = _topo, \
	}

// Represents exception levels that Recount can track metrics during.
__enum_closed_decl(recount_level_t, unsigned int, {
	// Exception level is transitioning from the kernel.
	// Must be first, as this is the initial state.
	RCT_LVL_KERNEL,
	// Exception level is transitioning from user space.
	RCT_LVL_USER,
#if RECOUNT_SECURE_METRICS
	// Exception level is transitioning from secure execution.
	RCT_LVL_SECURE,
#endif // RECOUNT_SECURE_METRICS
	RCT_LVL_COUNT,
});

// The current objects with resource accounting policies.
RECOUNT_PLAN_DECLARE(recount_thread_plan);
RECOUNT_PLAN_DECLARE(recount_task_plan);
RECOUNT_PLAN_DECLARE(recount_task_terminated_plan);
RECOUNT_PLAN_DECLARE(recount_coalition_plan);
RECOUNT_PLAN_DECLARE(recount_processor_plan);

#pragma mark - generic accounting

// A track is where counter values can be updated atomically for readers by a
// single writer.
struct recount_track {
	// Used to synchronize updates so multiple values appear to be updated atomically.
	uint32_t rt_pad;
	uint32_t rt_sync;

	// The CPU usage metrics currently supported by Recount.
	struct recount_usage {
		struct recount_metrics {
			// Time tracking, in Mach timebase units.
			uint64_t rm_time_mach;
#if CONFIG_PERVASIVE_CPI
			// CPU performance counter metrics, when available.
			uint64_t rm_instructions;
			uint64_t rm_cycles;
#endif // CONFIG_PERVASIVE_CPI
		} ru_metrics[RCT_LVL_COUNT];

#if CONFIG_PERVASIVE_ENERGY
		// CPU energy in nanojoules, when available.
		// This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC.
		uint64_t ru_energy_nj;
#endif // CONFIG_PERVASIVE_ENERGY
	} rt_usage;
};

// Memory management routines for tracks and usage structures.
struct recount_track *recount_tracks_create(recount_plan_t plan);
void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks);
struct recount_usage *recount_usage_alloc(recount_topo_t topo);
void recount_usage_free(recount_topo_t topo, struct recount_usage *usage);

// Attribute tracks to usage structures, to read their values for typical high-level interfaces.

// Sum any tracks to a single sum.
void recount_sum(recount_plan_t plan, const struct recount_track *tracks,
    struct recount_usage *sum);

// Summarize tracks into a total sum and another for a particular CPU kind.
void recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
    struct recount_track *tracks, recount_cpu_kind_t kind,
    struct recount_usage *sum, struct recount_usage *only_kind);
// The same as above, but for usage-only objects, like coalitions.
void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
    struct recount_usage *usage_list, recount_cpu_kind_t kind,
    struct recount_usage *sum, struct recount_usage *only_kind);

// Sum the counters for each perf-level, in the order returned by the sysctls.
void recount_sum_perf_levels(recount_plan_t plan,
    struct recount_track *tracks, struct recount_usage *sums);

#pragma mark - xnu internals

#if XNU_KERNEL_PRIVATE

struct thread;
struct work_interval;
struct task;
struct proc;

// A smaller usage structure if only times are needed by a client.
struct recount_times_mach {
	uint64_t rtm_user;
	uint64_t rtm_system;
};

struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage);
uint64_t recount_usage_system_time_mach(struct recount_usage *usage);
uint64_t recount_usage_time_mach(struct recount_usage *usage);
uint64_t recount_usage_cycles(struct recount_usage *usage);
uint64_t recount_usage_instructions(struct recount_usage *usage);

// Access another thread's usage data.
void recount_thread_usage(struct thread *thread, struct recount_usage *usage);
void recount_thread_perf_level_usage(struct thread *thread,
    struct recount_usage *usage_levels);
uint64_t recount_thread_time_mach(struct thread *thread);
struct recount_times_mach recount_thread_times(struct thread *thread);

// Read the current thread's usage data, accumulating counts until now.
//
// Interrupts must be disabled.
void recount_current_thread_usage(struct recount_usage *usage);
struct recount_times_mach recount_current_thread_times(void);
void recount_current_thread_usage_perf_only(struct recount_usage *usage,
    struct recount_usage *usage_perf_only);
void recount_current_thread_perf_level_usage(struct recount_usage
    *usage_levels);
uint64_t recount_current_thread_time_mach(void);
uint64_t recount_current_thread_user_time_mach(void);
uint64_t recount_current_thread_interrupt_time_mach(void);
uint64_t recount_current_thread_energy_nj(void);
void recount_current_task_usage(struct recount_usage *usage);
void recount_current_task_usage_perf_only(struct recount_usage *usage,
    struct recount_usage *usage_perf_only);

// Access a work interval's usage data.
void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage);
struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval);
uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval);

// Access another task's usage data.
void recount_task_usage(struct task *task, struct recount_usage *usage);
struct recount_times_mach recount_task_times(struct task *task);
void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum,
    struct recount_usage *sum_perf_only);
void recount_task_times_perf_only(struct task *task,
    struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only);
uint64_t recount_task_energy_nj(struct task *task);
bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
    struct recount_usage *usage_levels);

// Get the sum of all terminated threads in the task (not including active threads).
void recount_task_terminated_usage(struct task *task,
    struct recount_usage *sum);
struct recount_times_mach recount_task_terminated_times(struct task *task);
void recount_task_terminated_usage_perf_only(struct task *task,
    struct recount_usage *sum, struct recount_usage *perf_only);

int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr,
    size_t usize, int *ret);

#endif // XNU_KERNEL_PRIVATE

#if MACH_KERNEL_PRIVATE

#include <kern/smp.h>
#include <mach/machine/thread_status.h>
#include <machine/machine_routines.h>

#if __arm64__
static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) ==
    (CLUSTER_TYPE_E > CLUSTER_TYPE_P));
#endif // __arm64__

#pragma mark threads

// The per-thread resource accounting structure.
struct recount_thread {
	// Resources consumed across the lifetime of the thread, according to
	// `recount_thread_plan`.
	struct recount_track *rth_lifetime;
	// Time spent by this thread running interrupt handlers.
	uint64_t rth_interrupt_duration_mach;
#if RECOUNT_THREAD_BASED_LEVEL
	// The current level this thread is executing in.
	recount_level_t rth_current_level;
#endif // RECOUNT_THREAD_BASED_LEVEL
};
void recount_thread_init(struct recount_thread *th);
void recount_thread_copy(struct recount_thread *dst,
    struct recount_thread *src);
void recount_thread_deinit(struct recount_thread *th);

#pragma mark work_intervals

// The per-work-interval resource accounting structure.
struct recount_work_interval {
	// Resources consumed during the currently active work interval instance by
	// threads participating in the work interval, according to `recount_work_interval_plan`.
	struct recount_track *rwi_current_instance;
};
void recount_work_interval_init(struct recount_work_interval *wi);
void recount_work_interval_deinit(struct recount_work_interval *wi);

#pragma mark tasks

// The per-task resource accounting structure.
struct recount_task {
	// Resources consumed across the lifetime of the task, including active
	// threads, according to `recount_task_plan`.
	//
	// The `recount_task_plan` must be per-CPU to provide mutual exclusion for
	// writers.
	struct recount_track *rtk_lifetime;
	// Usage from threads that have terminated or child tasks that have exited,
	// according to `recount_task_terminated_plan`.
	//
	// Protected by the task lock when threads terminate.
	struct recount_usage *rtk_terminated;
};
void recount_task_init(struct recount_task *tk);
// Called on tasks that are moving their accounting information to a
// synthetic or re-exec-ed task.
void recount_task_copy(struct recount_task *dst,
    const struct recount_task *src);
void recount_task_deinit(struct recount_task *tk);

#pragma mark coalitions

// The per-coalition resource accounting structure.
struct recount_coalition {
	// Resources consumed by exited tasks only, according to
	// `recount_coalition_plan`.
	//
	// Protected by the coalition lock when tasks exit and roll-up their
	// statistics.
	struct recount_usage *rco_exited;
};
void recount_coalition_init(struct recount_coalition *co);
void recount_coalition_deinit(struct recount_coalition *co);

// Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure.
void recount_coalition_usage_perf_only(struct recount_coalition *coal,
    struct recount_usage *sum, struct recount_usage *sum_perf_only);

#pragma mark processors

struct processor;

// A snap records counter values at a specific point in time.
struct recount_snap {
	uint64_t rsn_time_mach;
#if CONFIG_PERVASIVE_CPI
	struct cpc_cycles_instrs rsn_cpu_counts;
#endif // CONFIG_PERVASIVE_CPI
};

// The per-processor resource accounting structure.
struct recount_processor {
	struct recount_snap rpr_snap;
	struct recount_track rpr_active;
#if MACH_ASSERT
	recount_level_t rpr_current_level;
#endif // MACH_ASSERT
	uint64_t rpr_interrupt_duration_mach;
	uint64_t rpr_last_interrupt_enter_time_mach;
	uint64_t rpr_last_interrupt_leave_time_mach;
	uint64_t rpr_idle_time_mach;
	_Atomic uint64_t rpr_state_last_abs_time;
	_Atomic uint64_t rpr_idle_count;
#if __AMP__
	// Cache the RCT_TOPO_CPU_KIND offset, which cannot change.
	uint8_t rpr_cpu_kind_index;
#endif // __AMP__
};
void recount_processor_init(struct processor *processor);

// Get a snapshot of the processor's usage, along with an up-to-date snapshot
// of its idle time (to now if the processor is currently idle).
void recount_processor_usage(struct recount_processor *pr,
    struct recount_usage *usage, uint64_t *idle_time_mach);

// Get the current amount of time spent handling interrupts by the current
// processor.
uint64_t recount_current_processor_interrupt_duration_mach(void);

#pragma mark updates

// The following interfaces are meant for specific adopters, like the
// scheduler or platform code responsible for entering and exiting the kernel.

// Fill in a snap with the current values from time- and count-keeping hardware.
void recount_snapshot(struct recount_snap *snap);

// During user/kernel transitions, other serializing events provide enough
// serialization around reading the counter values.
void recount_snapshot_speculative(struct recount_snap *snap);

// Called by the scheduler when a context switch occurs.
void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread,
    struct task *off_task);
// Called by the machine-dependent code to accumulate energy.
void recount_add_energy(struct thread *off_thread, struct task *off_task,
    uint64_t energy_nj);
// Log a kdebug event when a thread switches off-CPU.
void recount_log_switch_thread(const struct recount_snap *snap);
// Log a kdebug event when a thread switches on-CPU.
void recount_log_switch_thread_on(const struct recount_snap *snap);

// This function requires that no writers race with it -- this is only safe in
// debugger context or while running in the context of the track being
// inspected.
void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
    struct recount_usage *sum);

// For handling precise user/kernel time updates.
void recount_leave_user(void);
void recount_enter_user(void);
// For handling interrupt time updates.
void recount_enter_interrupt(void);
void recount_leave_interrupt(void);
#if __x86_64__
// Handle interrupt time-keeping on Intel, which aren't unified with the trap
// handlers, so whether the user or system timers are updated depends on the
// save-state.
void recount_enter_intel_interrupt(x86_saved_state_t *state);
void recount_leave_intel_interrupt(void);
#endif // __x86_64__

#endif // MACH_KERNEL_PRIVATE

#if XNU_KERNEL_PRIVATE

#if RECOUNT_SECURE_METRICS
// Handle guarded mode updates.
void recount_enter_secure(void);
void recount_leave_secure(void);
#endif // RECOUNT_SECURE_METRICS

#endif // XNU_KERNEL_PRIVATE

#if MACH_KERNEL_PRIVATE

// Hooks for each processor idling, running, and onlining.
void recount_processor_idle(struct recount_processor *pr,
    struct recount_snap *snap);
void recount_processor_run(struct recount_processor *pr,
    struct recount_snap *snap);
void recount_processor_online(processor_t processor, struct recount_snap *snap);

#pragma mark rollups

// Called by the thread termination queue with the task lock held.
void recount_task_rollup_thread(struct recount_task *tk,
    const struct recount_thread *th);

// Called by the coalition roll-up statistics functions with coalition lock
// held.
void recount_coalition_rollup_task(struct recount_coalition *co,
    struct recount_task *tk);

#endif // MACH_KERNEL_PRIVATE

__END_DECLS

#endif // KERN_RECOUNT_H