Loading...
gen/thread_stack_pcs.c Libc-1353.100.2 Libc-1725.40.4
--- Libc/Libc-1353.100.2/gen/thread_stack_pcs.c
+++ Libc/Libc-1725.40.4/gen/thread_stack_pcs.c
@@ -26,11 +26,15 @@
 #include <pthread.h>
 #include <mach/mach.h>
 #include <mach/vm_statistics.h>
+#include <mach-o/dyld_priv.h>
 #include <stdlib.h>
+#include <pthread/private.h>
 #include <pthread/stack_np.h>
 #include "stack_logging.h"
 
-#define	INSTACK(a)	((a) >= stackbot && (a) <= stacktop)
+// maps regular stack to id=1 and dyld stack to id=2
+#define	STACKID(a)	(((a) >= stackbot && (a) <= stacktop) ? 1 : 2)
+#define	INSTACK(a)	(((a) >= stackbot && (a) <= stacktop) || (((a) != NULL) && (a) >= dyldstackbot && (a) <= dyldstacktop))
 #if defined(__x86_64__)
 #define	ISALIGNED(a)	((((uintptr_t)(a)) & 0xf) == 0)
 #elif defined(__i386__)
@@ -39,16 +43,105 @@
 #define	ISALIGNED(a)	((((uintptr_t)(a)) & 0x1) == 0)
 #endif
 
+// If we're outside of the pthread stack, assume any frame larger than 1MB is a
+// bogus pointer and stop walking the stack
+#define MAX_ALT_FRAME_SIZE 0x100000
+
+// The Swift async ABI is not implemented on 32bit architectures.
+#if __LP64__ || __ARM64_ARCH_8_32__
+// Tests if a frame is part of an async extended stack.
+// If an extended frame record is needed, the prologue of the function will
+// store 3 pointers consecutively in memory:
+//    [ AsyncContext, FP | (1 << 60), LR]
+// and set the new FP to point to that second element. Bits 63:60 of that
+// in-memory FP should be considered an ABI tag of some kind, and stack
+// walkers can expect to see 3 different values in the wild:
+//    * 0b0000 if there is an old-style frame (and still most non-Swift)
+//             record with just [FP, LR].
+//    * 0b0001 if there is one of these [Ctx, FP, LR] records.
+//    * 0b1111 in current kernel code.
+static uint32_t
+__is_async_frame(uintptr_t frame)
+{
+	uint64_t stored_fp = *(uint64_t*)frame;
+	if ((stored_fp >> 60) != 1)
+		return 0;
+
+	// The Swift runtime stores the async Task pointer in the 3rd Swift
+	// private TSD.
+	uintptr_t task_address = (uintptr_t)_pthread_getspecific_direct(__PTK_FRAMEWORK_SWIFT_KEY3);
+	if (task_address == 0)
+		return 0;
+	// This offset is an ABI guarantee from the Swift runtime.
+	int task_id_offset = 4 * sizeof(void *) + 4;
+	uint32_t *task_id_address = (uint32_t *)(task_address + task_id_offset);
+	// The TaskID is guaranteed to be non-zero.
+	return *task_id_address;
+}
+
+// Given a frame pointer that points to an async frame on the stack,
+// walk the list of async activations (as opposed to the OS stack) to
+// gather the PCs of the successive async activations which led us to
+// this point.
 __attribute__((noinline))
 static void
+__thread_stack_async_pcs(vm_address_t *buffer, unsigned max, unsigned *nb, uintptr_t frame)
+{
+	// The async context pointer is stored right before the saved FP
+	uint64_t async_context = *(uint64_t *)(frame - 8);
+	uintptr_t resume_addr, next;
+
+	do {
+		// The async context starts with 2 pointers:
+		// - the parent async context (morally equivalent to the parent
+		//   async frame frame pointer)
+		// - the resumption PC (morally equivalent to the return address)
+		// We can just use pthread_stack_frame_decode_np() because it just
+		// strips a data and a code pointer.
+#if  __ARM64_ARCH_8_32__
+		// On arm64_32, the stack layout is the same (64bit pointers), but
+		// the regular pointers in the async context are still 32 bits.
+		// Given arm64_32 never has PAC, we can just read them.
+		next = *(uintptr_t*)(uintptr_t)async_context;
+		resume_addr = *(uintptr_t*)(uintptr_t)(async_context+4);
+#else
+		next = pthread_stack_frame_decode_np(async_context, &resume_addr);
+#endif
+		if (!resume_addr)
+			return;
+
+		// The resume address for Swift async coroutines is at the beginnining
+		// of a function. Most of the clients of backtraces unconditionally
+		// apply -1 to the return addresses in order to symbolicate the call
+		// site rather than the the return address, and thus symbolicate
+		// something unrelated in this case. Mitigate the issue by applying
+		// a one byte offset to the resume address before storing it.
+		buffer[*nb] = resume_addr + 1;
+		(*nb)++;
+
+		if(!next || !ISALIGNED(next))
+			return;
+
+		async_context = next;
+	} while (max--);
+}
+#endif
+
+// Gather a maximum of `max` PCs of the current call-stack into `buffer`. If
+// `allow_async` is true, then switch to gathering Swift async frames instead
+// of the OS call-stack when an extended frame is encountered.
+__attribute__((noinline))
+static unsigned int
 __thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb,
-		unsigned skip, void *startfp)
+		unsigned skip, void *startfp, bool allow_async)
 {
 	void *frame, *next;
 	pthread_t self = pthread_self();
 	void *stacktop = pthread_get_stackaddr_np(self);
 	void *stackbot = stacktop - pthread_get_stacksize_np(self);
-
+	const void *dyldstacktop = NULL;
+	const void *dyldstackbot = NULL;
+	unsigned int has_extended_frame = 0;
 	*nb = 0;
 
 	// Rely on the fact that our caller has an empty stackframe (no local vars)
@@ -56,27 +149,93 @@
 	frame = __builtin_frame_address(0);
 	next = (void*)pthread_stack_frame_decode_np((uintptr_t)frame, NULL);
 
+	_dyld_stack_range(&dyldstackbot, &dyldstacktop);
+
+	if (!ISALIGNED(frame)) {
+		return 0;
+	} else if (!INSTACK(frame)) {
+		// Allow calls to thread_stack_pcs from one non-default stack to work
+		// around rdar://114874436
+		if (startfp || allow_async) {
+			return 0;
+		}
+		// Need to not decrement skip in the condition to avoid breaking to the
+		// rest of the function with an underflowed skip value
+		while (skip) {
+			skip--;
+			const ptrdiff_t diff = (ptrdiff_t)next - (ptrdiff_t)frame;
+			if (!ISALIGNED(next) || (!INSTACK(next) &&
+					(diff <= 0 || diff >= MAX_ALT_FRAME_SIZE))) {
+				return 0;
+			}
+			frame = next;
+			next = (void*)pthread_stack_frame_decode_np((uintptr_t)frame, NULL);
+			if (INSTACK(frame)) {
+				goto pthread_stacktrace;
+			}
+		}
+		while (max--) {
+			uintptr_t retaddr;
+#if __LP64__ || __ARM64_ARCH_8_32__
+			if (__is_async_frame((uintptr_t)frame)) {
+				has_extended_frame = 1;
+			}
+#endif
+			next = (void*)pthread_stack_frame_decode_np((uintptr_t)frame, &retaddr);
+			buffer[*nb] = retaddr;
+			(*nb)++;
+			const ptrdiff_t diff = (ptrdiff_t)next - (ptrdiff_t)frame;
+			if (!ISALIGNED(next) || (!INSTACK(next) &&
+					(diff <= 0 || diff >= MAX_ALT_FRAME_SIZE))) {
+				return has_extended_frame;
+			}
+			frame = next;
+			if (INSTACK(next)) {
+				break;
+			}
+		}
+		// We get to here either by jumping back to the pthread stack or by
+		// enumerating `max` frames in the non-default stack. In the latter
+		// case, we're now done
+		if (max + 1 == 0) {
+			return has_extended_frame;
+		}
+	}
+pthread_stacktrace:
+
 	/* make sure return address is never out of bounds */
 	stacktop -= (next - frame);
 
-	if(!INSTACK(frame) || !ISALIGNED(frame))
-		return;
 	while (startfp || skip--) {
 		if (startfp && startfp < next) break;
 		if(!INSTACK(next) || !ISALIGNED(next) || next <= frame)
-			return;
+			return 0;
 		frame = next;
 		next = (void*)pthread_stack_frame_decode_np((uintptr_t)frame, NULL);
 	}
 	while (max--) {
 		uintptr_t retaddr;
+
+#if __LP64__ || __ARM64_ARCH_8_32__
+		unsigned int async_task_id = __is_async_frame((uintptr_t)frame);
+		if (async_task_id) {
+			if (allow_async) {
+				__thread_stack_async_pcs(buffer, max, nb, (uintptr_t)frame);
+				return async_task_id;
+			} else {
+				has_extended_frame = 1;
+			}
+		}
+#endif
 		next = (void*)pthread_stack_frame_decode_np((uintptr_t)frame, &retaddr);
 		buffer[*nb] = retaddr;
 		(*nb)++;
-		if(!INSTACK(next) || !ISALIGNED(next) || next <= frame)
-			return;
+		if(!INSTACK(next) || !ISALIGNED(next) || ((next <= frame) && (STACKID(next) == STACKID(frame))))
+			return has_extended_frame;
 		frame = next;
 	}
+
+	return has_extended_frame;
 }
 
 // Note that callee relies on this function having a minimal stackframe
@@ -87,7 +246,16 @@
 		unsigned skip, void *startfp)
 {
 	// skip this frame
-	__thread_stack_pcs(buffer, max, nb, skip + 1, startfp);
+	__thread_stack_pcs(buffer, max, nb, skip + 1, startfp, false);
+}
+
+__private_extern__ __attribute__((disable_tail_calls))
+unsigned int
+_thread_stack_async_pcs(vm_address_t *buffer, unsigned max, unsigned *nb,
+		unsigned skip, void *startfp)
+{
+	// skip this frame
+	return __thread_stack_pcs(buffer, max, nb, skip + 1, startfp, true);
 }
 
 // Prevent thread_stack_pcs() from getting tail-call-optimized into
@@ -100,8 +268,15 @@
 // Note that callee relies on this function having a minimal stackframe
 // to introspect (i.e. no tailcall and no local variables)
 __attribute__((disable_tail_calls))
-void
+unsigned int
 thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb)
 {
-	__thread_stack_pcs(buffer, max, nb, 0, NULL);
-}
+	return __thread_stack_pcs(buffer, max, nb, 0, NULL, /* allow_async */false);
+}
+
+__attribute__((disable_tail_calls))
+unsigned int
+thread_stack_async_pcs(vm_address_t *buffer, unsigned max, unsigned *nb)
+{
+	return __thread_stack_pcs(buffer, max, nb, 0, NULL, /* allow_async */true);
+}