threaded_stress.c diff - tests/threaded_stress.c - Libmalloc source code libmalloc-116

tests/threaded_stress.c /dev/null ⇄ libmalloc-715.120.13
--- /dev/null
+++ libmalloc/libmalloc-715.120.13/tests/threaded_stress.c
@@ -0,0 +1,430 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <math.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <malloc/malloc.h>
+#include <darwintest.h>
+
+#include <../src/internal.h>
+
+#if !MALLOC_TARGET_EXCLAVES
+#include <sys/sysctl.h>
+#include <mach/mach.h>
+typedef unsigned seed_type_t;
+#else
+typedef unsigned long seed_type_t;
+#endif // !MALLOC_TARGET_EXCLAVES
+
+// These tests are based on perf_contended_malloc_free, but intended as
+// functional stress tests rather than performance tests.
+
+T_GLOBAL_META(T_META_TAG_XZONE);
+
+// move the darwintest assertion code out of the straight line execution path
+// since it is has non-trivial overhead and codegen impact even if the assertion
+// is never triggered.
+#define iferr(_e) if(__builtin_expect(!!(_e), 0))
+
+#pragma mark -
+
+static uint64_t
+random_busy_counts(seed_type_t *seed, uint64_t *first, uint64_t *second)
+{
+	uint64_t random = rand_r(seed);
+	*first = 0x4 + (random & (0x10 - 1));
+	random >>= 4;
+	*second = 0x4 + (random & (0x10 - 1));
+	random >>= 4;
+	return random;
+}
+
+// By default busy() does no cpu busy work in the malloc bench
+enum {
+	busy_is_nothing = 0,
+	busy_is_cpu_busy,
+	busy_is_cpu_yield,
+};
+static int busy_select = busy_is_nothing;
+
+static double
+cpu_busy(uint64_t n)
+{
+	double d = M_PI;
+	uint64_t i;
+	for (i = 0; i < n; i++) d *= M_PI;
+	return d;
+}
+
+static double
+cpu_yield(uint64_t n)
+{
+	uint64_t i;
+	for (i = 0; i < n; i++) {
+#if defined(__arm__) || defined(__arm64__)
+	asm volatile("yield");
+#elif defined(__x86_64__) || defined(__i386__)
+	asm volatile("pause");
+#else
+#error Unrecognized architecture
+#endif
+	}
+	return 0;
+}
+
+__attribute__((noinline))
+static double
+busy(uint64_t n)
+{
+	switch(busy_select) {
+	case busy_is_cpu_busy:
+		return cpu_busy(n);
+	case busy_is_cpu_yield:
+		return cpu_yield(n);
+	default:
+		return 0;
+	}
+}
+
+#if MALLOC_TARGET_EXCLAVES
+static pthread_cond_t ready_cond;
+static pthread_mutex_t ready_mut;
+static uint32_t num_waiting_threads;
+#else
+static semaphore_t ready_sem, start_sem;
+#endif // MALLOC_TARGET_EXCLAVES
+
+static uint32_t nthreads;
+static _Atomic uint32_t active_thr;
+static _Atomic int64_t todo;
+
+static uint32_t
+ncpu(void)
+{
+#if MALLOC_TARGET_EXCLAVES
+	// TODO: Switch to sysctl once liblibc reports multi-cpu. Currently EVE runs
+	// tests on a single thread, but it's good to get some concurrenct tests in,
+	// even if the threads don't run in parallel
+	return 8;
+#else
+	static uint32_t activecpu, physicalcpu;
+	if (!activecpu) {
+		uint32_t n;
+		size_t s = sizeof(n);
+		sysctlbyname("hw.activecpu", &n, &s, NULL, 0);
+		activecpu = n;
+		s = sizeof(n);
+		sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0);
+		physicalcpu = n;
+	}
+	return MIN(activecpu, physicalcpu);
+#endif // MALLOC_TARGET_EXCLAVES
+}
+
+static uint32_t live_allocations;
+static void **allocations;
+static size_t max_rand, min_size, incr_size;
+
+static void
+malloc_threaded_stress(bool singlethreaded, size_t from, size_t to, size_t incr,
+		uint32_t live_allocations_count, uint64_t iterations,
+		void *(*thread_fn)(void *))
+{
+	kern_return_t kr;
+	int r;
+	int batch_size;
+	char *e;
+
+#if MALLOC_TARGET_EXCLAVES
+	nthreads = singlethreaded ? 1 : ncpu();
+	busy_select = 0;
+#else
+	if (singlethreaded) {
+		nthreads = 1;
+	} else {
+		if ((e = getenv("THREADED_STRESS_NTHREADS"))) {
+			nthreads = strtoul(e, NULL, 0);
+		}
+
+		if (nthreads < 2) {
+			nthreads = ncpu();
+		}
+	}
+	if ((e = getenv("THREADED_STRESS_CPU_BUSY"))) {
+		busy_select = strtoul(e, NULL, 0);
+	}
+#endif // MALLOC_TARGET_EXCLAVES
+
+	atomic_init(&todo, iterations);
+	atomic_init(&active_thr, nthreads);
+
+	live_allocations = live_allocations_count;
+	allocations = malloc(sizeof(allocations[0]) * live_allocations);
+	T_QUIET; T_ASSERT_NOTNULL(allocations, "allocations array");
+	incr_size = incr;
+	min_size = from;
+	max_rand = (to - from) / incr;
+	assert((to - from) % incr == 0);
+
+#if MALLOC_TARGET_EXCLAVES
+	r = pthread_cond_init(&ready_cond, NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "condvar create");
+	r = pthread_mutex_init(&ready_mut, NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex create");
+	num_waiting_threads = 0;
+#else
+	kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+	kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+#endif // MALLOC_TARGET_EXCLAVES
+
+	// Allocate thread array on heap to avoid llvm inserting stack check, which
+	// doesn't compile
+	pthread_t *threads = malloc(sizeof(pthread_t) * nthreads);
+	for (int i = 0; i < nthreads; i++) {
+		r = pthread_create(&threads[i], NULL, thread_fn,
+				(void *)(uintptr_t)(i + 1));
+		T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create");
+	}
+
+#if MALLOC_TARGET_EXCLAVES
+	// Wait for all nthreads to signal that they're ready
+	for (;;) {
+		r = pthread_mutex_lock(&ready_mut);
+		iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, NULL);}
+		T_ASSERT_POSIX_ZERO(r, "lock mutex");
+		if (num_waiting_threads == nthreads) {
+			r = pthread_cond_broadcast(&ready_cond);
+			T_ASSERT_POSIX_ZERO(r, "ready condvar broadcast");
+			r = pthread_mutex_unlock(&ready_mut);
+			T_ASSERT_POSIX_ZERO(r, "ready mutex unlock");
+			break;
+		} else {
+			r = pthread_mutex_unlock(&ready_mut);
+			T_ASSERT_POSIX_ZERO(r, "ready mutex unlock");
+			yield();
+		}
+	}
+#else
+	for (int i = 0; i < nthreads; i++) {
+		kr = semaphore_wait(ready_sem);
+		iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+	}
+
+	kr = semaphore_signal_all(start_sem);
+	iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");}
+#endif // MALLOC_TARGET_EXCLAVES
+
+	for (int i = 0; i < nthreads; i++) {
+		r = pthread_join(threads[i], NULL);
+		T_ASSERT_POSIX_ZERO(r, "pthread_join");
+	}
+
+	free(threads);
+}
+
+static void *
+malloc_size_stress_thread(void *arg)
+{
+	kern_return_t kr;
+	int r;
+	seed_type_t seed;
+	volatile double dummy;
+	uint64_t pos, remaining_frees;
+	void *alloc;
+
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	// start threads off in different positions in allocations array
+	pos = (seed - 1) * (live_allocations / nthreads);
+	remaining_frees = live_allocations;
+#if MALLOC_TARGET_EXCLAVES
+	r = pthread_mutex_lock(&ready_mut);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, NULL);
+	num_waiting_threads++;
+	r = pthread_cond_wait(&ready_cond, &ready_mut);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, NULL);
+	r = pthread_mutex_unlock(&ready_mut);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, NULL);
+#else
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+#endif // MALLOC_TARGET_EXCLAVES
+
+	while (1) {
+		uint64_t first, second;
+		uint64_t random = random_busy_counts(&seed, &first, &second);
+		if (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+			dummy = busy(first);
+			alloc = malloc(min_size + (random % (max_rand + 1)) * incr_size);
+			iferr (!alloc) { T_ASSERT_POSIX_ZERO(errno, "malloc"); }
+		} else {
+			if (!remaining_frees--) break;
+			alloc = NULL;
+		}
+		alloc = atomic_exchange(
+				(_Atomic(void *) *)&allocations[(pos++)%live_allocations],
+				alloc);
+		if (alloc) {
+			// Size once while allocated
+			(void)malloc_size(alloc);
+
+			dummy = busy(second);
+			free(alloc);
+
+			// Calling malloc_size on free pointers isn't safe in exclaves
+#if !MALLOC_TARGET_EXCLAVES
+			// Try again while (possibly) free
+			malloc_size(alloc);
+#endif // !MALLOC_TARGET_EXCLAVES
+		}
+	}
+
+	atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed);
+	return NULL;
+}
+
+T_DECL(threaded_stress_malloc_size_tiny,
+		"multi-threaded stress test for tiny malloc_size",
+		T_META_ENVVAR("MallocNanoZone=0"))
+{
+	uint64_t iterations = 2000000ull;
+#if TARGET_OS_TV || TARGET_OS_WATCH
+	iterations = 200000ull;
+#endif // TARGET_OS_TV || TARGET_OS_WATCH
+
+	malloc_threaded_stress(false, 16, 256, 16, 2048,
+			iterations, malloc_size_stress_thread);
+}
+
+T_DECL(threaded_stress_malloc_size_nano,
+		"multi-threaded stress test for nano malloc_size",
+		T_META_ENVVAR("MallocNanoZone=1"))
+{
+	uint64_t iterations = 2000000ull;
+#if TARGET_OS_TV || TARGET_OS_WATCH
+	iterations = 200000ull;
+#endif // TARGET_OS_TV || TARGET_OS_WATCH
+
+	malloc_threaded_stress(false, 16, 256, 16, 2048,
+			iterations, malloc_size_stress_thread);
+}
+
+T_DECL(threaded_stress_malloc_size_small,
+		"multi-threaded stress test for small malloc_size")
+{
+	uint64_t iterations = 200000ull;
+#if TARGET_OS_TV || TARGET_OS_WATCH
+	iterations = 20000ull;
+#endif // TARGET_OS_TV || TARGET_OS_WATCH
+
+	malloc_threaded_stress(false, 2048, 8192, 2048, 64,
+			iterations, malloc_size_stress_thread);
+}
+
+#if !MALLOC_TARGET_EXCLAVES
+// Exclaves don't support fork()
+static void *
+malloc_fork_stress_thread(void *arg)
+{
+	kern_return_t kr;
+	int r;
+	unsigned int seed;
+	volatile double dummy;
+	uint64_t pos, remaining_frees;
+	void *alloc;
+	bool parent = true;
+	uint64_t children = 0;
+
+	char *e;
+	unsigned long fork_prob = 100000;
+	if ((e = getenv("THREADED_STRESS_FORK_PROB"))) {
+		unsigned long env_prob = strtoul(e, NULL, 0);
+		if (env_prob) {
+			fork_prob = env_prob;
+		}
+	}
+
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	// start threads off in different positions in allocations array
+	pos = (seed - 1) * (live_allocations / nthreads);
+	remaining_frees = live_allocations;
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+	while (1) {
+		uint64_t first, second;
+		uint64_t random = random_busy_counts(&seed, &first, &second);
+		if (parent && (random % fork_prob) == 0) {
+			pid_t pid = fork();
+			if (pid == -1) {
+				if (errno != EAGAIN) {
+					T_ASSERT_POSIX_SUCCESS(pid, "fork()");
+				}
+			} else if (pid == 0) {
+				parent = false;
+			} else {
+				children++;
+			}
+		}
+
+		if (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+			dummy = busy(first);
+			alloc = malloc(min_size + (random % (max_rand + 1)) * incr_size);
+			iferr (!alloc) { T_ASSERT_POSIX_ZERO(errno, "malloc"); }
+			memset(alloc, 'a', 16);
+		} else {
+			if (!remaining_frees--) break;
+			alloc = NULL;
+		}
+		alloc = atomic_exchange(
+				(_Atomic(void *) *)&allocations[(pos++)%live_allocations],
+				alloc);
+		if (alloc) {
+			dummy = busy(second);
+			free(alloc);
+		}
+	}
+
+	if (parent) {
+		for (uint64_t i = 0; i < children; i++) {
+			int status = 0;
+			pid_t child = wait(&status);
+			if (child == -1) {
+				T_ASSERT_POSIX_SUCCESS(child, "wait()");
+			}
+			T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "child exited");
+			T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "child succeeded");
+		}
+	}
+
+	atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed);
+	return NULL;
+}
+
+T_DECL(threaded_stress_fork, "multi-threaded stress test for fork",
+		T_META_ENVVAR("MallocNanoZone=0")) // rdar://118860589
+{
+	uint64_t iterations = 2000000ull;
+#if TARGET_OS_TV || TARGET_OS_WATCH
+	iterations = 200000ull;
+#endif // TARGET_OS_TV || TARGET_OS_WATCH
+
+	malloc_threaded_stress(false, 16, 256, 16, 2048,
+			iterations, malloc_fork_stress_thread);
+}
+
+T_DECL(threaded_stress_fork_small,
+		"multi-threaded stress test of small for fork",
+		T_META_ENVVAR("MallocNanoZone=0")) // rdar://118860589
+{
+	uint64_t iterations = 200000ull;
+#if TARGET_OS_TV || TARGET_OS_WATCH
+	iterations = 20000ull;
+#endif // TARGET_OS_TV || TARGET_OS_WATCH
+
+	malloc_threaded_stress(false, 2048, 8192, 2048, 64,
+			iterations, malloc_fork_stress_thread);
+}
+#endif // MALLOC_TARGET_EXCLAVES