Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | /* * Copyright (c) 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include <mach/host_priv.h> #include <mach/host_special_ports.h> #include <mach/memory_error_notification.h> #include <mach/mach_types.h> #include <mach/host_info.h> #include <kern/host.h> #include <kern/locks.h> #include <kern/ecc.h> #include <kern/spl.h> #include <kern/mpsc_queue.h> #include <kern/thread.h> #include <kern/thread_call.h> #include <kern/startup.h> #include <os/log.h> #include <pexpert/pexpert.h> #include <pexpert/device_tree.h> #include <libkern/OSAtomic.h> #include <arm/pmap_public.h> #include <vm/vm_protos.h> /* New CoreAnalytics ECC logging mechanism */ /** * Stubs for targets which do not support ECC. */ kern_return_t ecc_log_memory_error( __unused pmap_paddr_t physical_address, __unused uint32_t ecc_flags) { return KERN_NOT_SUPPORTED; } kern_return_t ecc_log_memory_error_internal( __unused pmap_paddr_t physical_address, __unused uint32_t ecc_flags) { return KERN_NOT_SUPPORTED; } kern_return_t ecc_log_memory_error_ce( __unused pmap_paddr_t physical_address, __unused uint32_t ecc_flags, __unused uint32_t ce_count) { return KERN_NOT_SUPPORTED; } kern_return_t kern_ecc_poll_register( __unused platform_error_handler_ecc_poll_t poll_func, __unused uint32_t max_errors) { return KERN_NOT_SUPPORTED; } /* * Used to report earlier errors that were found after ECC gets enabled. * We don't want the VM to panic for these. */ kern_return_t ecc_log_memory_error_delayed( __unused pmap_paddr_t physical_address, __unused uint32_t ecc_flags) { return KERN_FAILURE; } /** * MCC Logging */ /** * TODO: rdar://97394997 (Clean up ECC / MCC logging) * We can probably clean some of this up and share some of the code with ECC. */ #if XNU_HANDLE_MCC static struct mpsc_daemon_queue mcc_memory_error_event_queue; struct _mcc_mem_err_event { struct mpsc_queue_chain link; mcc_ecc_event_t event; }; typedef struct _mcc_mem_err_event* mcc_mem_err_event_t; #define MCC_ECC_NUM_ERRORS (1024) #define MCC_ERROR_EVENT_QUEUE_PRIORITY MAXPRI_USER static struct _mcc_mem_err_event mcc_events[MCC_ECC_NUM_ERRORS]; static atomic_int mcc_events_producer_idx = 0; static atomic_int mcc_events_consumer_idx = 0; SCALABLE_COUNTER_DEFINE(mcc_dropped_events); LCK_GRP_DECLARE(mcc_lock_grp, "mcc"); LCK_SPIN_DECLARE(mcc_lock, &mcc_lock_grp); static inline int mcc_events_next(int idx) { assert(idx < MCC_ECC_NUM_ERRORS); return (idx + 1) % MCC_ECC_NUM_ERRORS; } /* MCC ECC CoreAnalytics Error Logging */ static void mcc_error_notify_user(mcc_ecc_event_t event) { mach_port_t user_port = MACH_PORT_NULL; kern_return_t kr; kr = host_get_memory_error_port(host_priv_self(), &user_port); assert(kr == KERN_SUCCESS); if (!IPC_PORT_VALID(user_port)) { os_log_error(OS_LOG_DEFAULT, "Failed to get memory error port - mcc"); return; } mcc_memory_error_notification(user_port, event); ipc_port_release_send(user_port); } static void mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e, mpsc_daemon_queue_t queue __unused) { mcc_mem_err_event_t event; /* The consumer should never be invoked if there is nothing to consume. */ int mcc_events_consumer_curr_idx = atomic_load(&mcc_events_consumer_idx); assert(mcc_events_consumer_curr_idx != atomic_load(&mcc_events_producer_idx)); event = mpsc_queue_element(e, struct _mcc_mem_err_event, link); mcc_error_notify_user(event->event); int mcc_events_consumer_next_idx = mcc_events_next(mcc_events_consumer_curr_idx); atomic_store(&mcc_events_consumer_idx, mcc_events_consumer_next_idx); } static mcc_mem_err_event_t mcc_memory_error_create_event(mcc_ecc_event_t mcc_event) { mcc_mem_err_event_t ret = NULL; /** * @note We are unable to dynamically allocate events, because this function can be called from * the primary interrupt context. Instead, we allocate from a statically sized ring buffer. */ const boolean_t interrupts_enabled = ml_set_interrupts_enabled(FALSE); lck_spin_lock(&mcc_lock); int mcc_events_producer_curr_idx = atomic_load(&mcc_events_producer_idx); int mcc_events_producer_next_idx = mcc_events_next(mcc_events_producer_curr_idx); if (mcc_events_producer_next_idx == atomic_load(&mcc_events_consumer_idx)) { /** * The consumer is running behind the producer, and we're in the primary interrupt context. * Drop this event and return NULL to the caller. */ counter_inc(&mcc_dropped_events); ret = NULL; goto done; } mcc_mem_err_event_t event = &mcc_events[mcc_events_producer_curr_idx]; event->event = mcc_event; atomic_store(&mcc_events_producer_idx, mcc_events_producer_next_idx); ret = event; done: lck_spin_unlock(&mcc_lock); ml_set_interrupts_enabled(interrupts_enabled); return ret; } __startup_func static void mcc_logging_init(void) { mpsc_daemon_queue_init_with_thread(&mcc_memory_error_event_queue, mcc_memory_error_event_queue_invoke, MCC_ERROR_EVENT_QUEUE_PRIORITY, "daemon.mcc_error-events", MPSC_DAEMON_INIT_INACTIVE); mpsc_daemon_queue_activate(&mcc_memory_error_event_queue); } STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, mcc_logging_init); #endif /* XNU_HANDLE_MCC */ kern_return_t mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused) { #if XNU_HANDLE_MCC mcc_mem_err_event_t event = mcc_memory_error_create_event(mcc_event); if (event == NULL) { return KERN_RESOURCE_SHORTAGE; } assert(mcc_memory_error_event_queue.mpd_thread != NULL); mpsc_daemon_enqueue(&mcc_memory_error_event_queue, &event->link, MPSC_QUEUE_DISABLE_PREEMPTION); return KERN_SUCCESS; #else return KERN_FAILURE; #endif } #if (DEBUG || DEVELOPMENT) static int mcc_memory_error_notify_test_run(int64_t in, int64_t *out) { printf("Running mcc_memory_error_notify_test for %llu iterations\n", in); for (uint64_t i = 0; i < in; i++) { mcc_ecc_event_t event = {.version = MCC_ECC_V1, .status = (uint32_t)i}; /** * To accurately test mcc_log_memory_error, we must disable preemption, because it is called * from the primary interrupt context. */ disable_preemption(); mcc_log_memory_error(event); enable_preemption(); } *out = 1; return 0; } SYSCTL_TEST_REGISTER(mcc_memory_error_notify_test, mcc_memory_error_notify_test_run); #endif /* (DEBUG || DEVELOPMENT) */ /* Legacy ECC logging mechanism */ /* * ECC data. Not really KPCs, but this still seems like the * best home for this code. * * Circular buffer of events. When we fill up, drop data. */ #define ECC_EVENT_BUFFER_COUNT (256) struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT]; static uint32_t ecc_data_next_read; static uint32_t ecc_data_next_write; static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full? static LCK_GRP_DECLARE(ecc_data_lock_group, "ecc-data"); static LCK_SPIN_DECLARE(ecc_data_lock, &ecc_data_lock_group); static uint32_t ecc_correction_count; uint32_t ecc_log_get_correction_count() { return ecc_correction_count; } kern_return_t ecc_log_record_event(const struct ecc_event *ev) { spl_t x; if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) { panic("Count of %u on ecc event is too large.", (unsigned)ev->count); } x = splhigh(); lck_spin_lock(&ecc_data_lock); ecc_correction_count++; if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) { lck_spin_unlock(&ecc_data_lock); splx(x); return KERN_FAILURE; } bcopy(ev, &ecc_data[ecc_data_next_write], sizeof(*ev)); ecc_data_next_write++; ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT; ecc_data_empty = FALSE; lck_spin_unlock(&ecc_data_lock); splx(x); return KERN_SUCCESS; } kern_return_t ecc_log_get_next_event(struct ecc_event *ev) { spl_t x; x = splhigh(); lck_spin_lock(&ecc_data_lock); if (ecc_data_empty) { assert(ecc_data_next_write == ecc_data_next_read); lck_spin_unlock(&ecc_data_lock); splx(x); return KERN_FAILURE; } bcopy(&ecc_data[ecc_data_next_read], ev, sizeof(*ev)); ecc_data_next_read++; ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT; if (ecc_data_next_read == ecc_data_next_write) { ecc_data_empty = TRUE; } lck_spin_unlock(&ecc_data_lock); splx(x); return KERN_SUCCESS; } |