Loading...
pthreads/pthread.c Libc-583 Libc-825.26
--- Libc/Libc-583/pthreads/pthread.c
+++ Libc/Libc-825.26/pthreads/pthread.c
@@ -61,14 +61,17 @@
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
+#include <sys/mman.h>
 #include <machine/vmparam.h>
 #include <mach/vm_statistics.h>
+#include <mach/mach_init.h>
 #define	__APPLE_API_PRIVATE
 #include <machine/cpu_capabilities.h>
 #include <libkern/OSAtomic.h>
 #if defined(__ppc__)
 #include <libkern/OSCrossEndian.h>
 #endif
+#include <dispatch/private.h> /* for at_fork handlers */
 
 
 extern int _pthread_setcancelstate_internal(int state, int *oldstate, int conforming);
@@ -88,13 +91,15 @@
 static int _pthread_create_pthread_onstack(pthread_attr_t *attrs, void **stack, pthread_t *thread);
 static kern_return_t _pthread_free_pthread_onstack(pthread_t t, int freestruct, int termthread);
 static void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs, void * stack, size_t stacksize, int kernalloc, int nozero);
-static void _pthread_tsd_reinit(pthread_t t);
 static int  _new_pthread_create_suspended(pthread_t *thread, 
 	       const pthread_attr_t *attr,
 	       void *(*start_routine)(void *), 
 	       void *arg,
 		int create_susp);
 
+/* the registered libdispatch worker function */
+static void (*__libdispatch_workerfunction)(int, int, void *) = NULL;
+
 /* Get CPU capabilities from the kernel */
 __private_extern__ void _init_cpu_capabilities(void);
 
@@ -112,6 +117,47 @@
 	pthread_t thread;
 	mach_msg_trailer_t trailer;
 } pthread_reap_msg_t;
+
+/* Utilitie */
+
+__private_extern__ uintptr_t commpage_pfz_base=0;
+
+void __pthread_pfz_setup(const char *apple[]) __attribute__ ((visibility ("hidden")));
+
+static uintptr_t __pfz_from_kernel(const char *str)
+{
+	unsigned long tmpval;
+	/* Skip over key to the first value */
+	str = strchr(str, '=');
+	if (str == NULL)
+		return 0;
+	str++;
+	tmpval = strtoul(str, NULL, 0); /* may err by 0 or ULONG_MAX */
+	if (tmpval == ULONG_MAX)
+		tmpval = 0;
+
+	return (uintptr_t) tmpval;
+}
+
+void
+__pthread_pfz_setup(const char *apple[])
+{
+	const char **p;
+	for (p = apple; p && *p; p++) {
+		/* checking if matching apple variable is at begining */
+		if (strstr(*p, "pfz=") == *p) {
+			commpage_pfz_base = __pfz_from_kernel(*p);
+			bzero(*p,strlen(*p));
+			break;
+		}
+	}
+
+	if (commpage_pfz_base == 0)
+		commpage_pfz_base = _COMM_PAGE_TEXT_START;
+
+	return;
+}
+
 
 /* We'll implement this when the main thread is a pthread */
 /* Use the local _pthread struct to avoid malloc before our MiG reply port is set */
@@ -124,6 +170,8 @@
 /* _pthread_count is protected by _pthread_list_lock */
 static int _pthread_count = 1;
 int __unix_conforming = 0;
+static int __workqueue_newspis = 0;
+static int __workqueue_oldspis = 0;
 __private_extern__ size_t pthreadsize = 0;
 
 /* under rosetta we will use old style creation of threads */
@@ -145,7 +193,7 @@
 	} while(!_spin_lock_try(lock));
 }
 
-extern mach_port_t thread_recycle_port;
+static mach_port_t thread_recycle_port = MACH_PORT_NULL;
 
 /* These are used to keep track of a semaphore pool shared by mutexes and condition
 ** variables.
@@ -163,24 +211,29 @@
 
 static OSSpinLock __workqueue_list_lock = OS_SPINLOCK_INIT;
 
-static void _pthread_exit(pthread_t self, void *value_ptr);
+static void _pthread_exit(pthread_t self, void *value_ptr) __dead2;
 static void _pthread_setcancelstate_exit(pthread_t self, void  *value_ptr, int conforming);
 static pthread_attr_t _pthread_attr_default = {0};
 static void _pthread_workq_init(pthread_workqueue_t wq, const pthread_workqueue_attr_t * attr);
 static int kernel_workq_setup = 0;
 static volatile int32_t kernel_workq_count = 0;
-static volatile unsigned int user_workq_count = 0; 
+static volatile unsigned int user_workq_count = 0; 	/* number of outstanding workqueues */
+static volatile unsigned int user_workitem_count = 0; 	/* number of outstanding workitems */
 #define KERNEL_WORKQ_ELEM_MAX  	64		/* Max number of elements in the kerrel */
 static int wqreadyprio = 0;	/* current highest prio queue ready with items */
 
-static int __pthread_workqueue_affinity = 1;	/* 0 means no affinity */
 __private_extern__ struct __pthread_workitem_pool __pthread_workitem_pool_head = TAILQ_HEAD_INITIALIZER(__pthread_workitem_pool_head);
 __private_extern__ struct __pthread_workqueue_pool __pthread_workqueue_pool_head = TAILQ_HEAD_INITIALIZER(__pthread_workqueue_pool_head);
+
+static struct _pthread_workitem *  __workqueue_pool_ptr;
+static size_t __workqueue_pool_size = 0;
+static int __workqueue_nitems = 0;
 
 struct _pthread_workqueue_head __pthread_workq0_head;
 struct _pthread_workqueue_head __pthread_workq1_head;
 struct _pthread_workqueue_head __pthread_workq2_head;
-pthread_workqueue_head_t __pthread_wq_head_tbl[WQ_NUM_PRIO_QS] = {&__pthread_workq0_head, &__pthread_workq1_head, &__pthread_workq2_head};
+struct _pthread_workqueue_head __pthread_workq3_head;
+pthread_workqueue_head_t __pthread_wq_head_tbl[WORKQ_NUM_PRIOQUEUE] = {&__pthread_workq0_head, &__pthread_workq1_head, &__pthread_workq2_head, &__pthread_workq3_head};
 
 static void workqueue_list_lock(void);
 static void workqueue_list_unlock(void);
@@ -193,10 +246,12 @@
 extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 static pthread_workitem_t alloc_workitem(void);
 static void free_workitem(pthread_workitem_t);
+static void grow_workitem(void);
 static pthread_workqueue_t alloc_workqueue(void);
 static void free_workqueue(pthread_workqueue_t);
 static int _pthread_work_internal_init(void);
 static void workqueue_exit(pthread_t self, pthread_workqueue_t workq, pthread_workitem_t item);
+void _pthread_fork_child_postinit();
 
 void pthread_workqueue_atfork_prepare(void);
 void pthread_workqueue_atfork_parent(void);
@@ -211,6 +266,17 @@
 #define WQOPS_QUEUE_REMOVE 2
 #define WQOPS_THREAD_RETURN 4
 #define WQOPS_THREAD_SETCONC  8
+#define WQOPS_QUEUE_NEWSPISUPP  0x10    /* this is to check for newer SPI support */
+#define WQOPS_QUEUE_REQTHREADS  0x20	/* request number of threads of a prio */
+
+/* flag values for reuse field in the libc side _pthread_wqthread */
+#define WQ_FLAG_THREAD_PRIOMASK		0x0000ffff
+#define WQ_FLAG_THREAD_OVERCOMMIT	0x00010000      /* thread is with overcommit prio */
+#define WQ_FLAG_THREAD_REUSE		0x00020000      /* thread is being reused */
+#define WQ_FLAG_THREAD_NEWSPI		0x00040000      /* the call is with new SPIs */
+
+
+#define WORKQUEUE_OVERCOMMIT 0x10000	/* the work_kernreturn() for overcommit in prio field */
 
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start 
@@ -222,7 +288,7 @@
 __private_extern__
 void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 
-__private_extern__ 
+__private_extern__
 void _pthread_wqthread(pthread_t self, mach_port_t kport, void * stackaddr, pthread_workitem_t item, int reuse);
 
 #define PTHREAD_START_CUSTOM	0x01000000
@@ -234,7 +300,7 @@
 
 static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, const struct sched_param *);
 extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
-extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, pthread_workitem_t, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, pthread_workitem_t, int),__uint64_t);
+extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, pthread_workitem_t, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
 extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
 extern __uint64_t __thread_selfid( void );
 extern int __pthread_canceled(int);
@@ -243,7 +309,6 @@
 extern int __pthread_markcancel(int);
 extern int __workq_open(void);
 
-#define WORKQUEUE_OVERCOMMIT 0x10000
 
 extern int __workq_kernreturn(int, pthread_workitem_t, int, int);
 
@@ -251,7 +316,7 @@
 static const vm_address_t PTHREAD_STACK_HINT = 0xF0000000;
 #elif defined(__i386__) || defined(__x86_64__)
 static const vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
-#elif defined(__arm__)
+#elif defined(__arm__)  
 static const vm_address_t PTHREAD_STACK_HINT = 0x30000000;
 #else
 #error Need to define a stack address hint for this architecture
@@ -365,7 +430,6 @@
 	kern_return_t res = 0;
     	vm_address_t freeaddr;
     	size_t freesize;
-	task_t self = mach_task_self();
 	int thread_count;
 	mach_port_t kport;
 	semaphore_t joinsem = SEMAPHORE_NULL;
@@ -820,13 +884,16 @@
 _pthread_body(pthread_t self)
 {
     _pthread_set_self(self);
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+	if( (self->thread_id = __thread_selfid()) == (__uint64_t)-1)
+		printf("Failed to set thread_id in _pthread_body\n");
+#endif
     _pthread_exit(self, (self->fun)(self->arg));
 }
 
 void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int pflags)
 {
-	int ret;
 #if WQ_DEBUG
 	pthread_t pself;
 #endif
@@ -836,9 +903,9 @@
 	if ((pflags & PTHREAD_START_CUSTOM) == 0) {
 		stackaddr = (char *)self;
 		_pthread_struct_init(self, attrs, stackaddr,  stacksize, 1, 1);
-		#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 		_pthread_set_self(self);
-		#endif
+#endif
 		LOCK(_pthread_list_lock);
 		if (pflags & PTHREAD_START_SETSCHED) {
 			self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
@@ -850,9 +917,9 @@
 			self->detached |= PTHREAD_CREATE_DETACHED;
 		}
 	}  else { 
-		#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 		_pthread_set_self(self);
-		#endif
+#endif
 		LOCK(_pthread_list_lock);
 	}
 	self->kernel_thread = kport;
@@ -870,7 +937,7 @@
 	self->childrun = 1;
 	UNLOCK(_pthread_list_lock);
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 	if( (self->thread_id = __thread_selfid()) == (__uint64_t)-1)
 		printf("Failed to set thread_id in pthread_start\n");
 #endif
@@ -916,7 +983,7 @@
 		t->policy = attrs->policy;
 		t->param = attrs->param;
 		t->freeStackOnExit = attrs->freeStackOnExit;
-		t->mutexes = (struct _pthread_mutex *)NULL;
+		t->cancel_error = 0;
 		t->sig = _PTHREAD_SIG;
                 t->reply_port = MACH_PORT_NULL;
                 t->cthread_self = NULL;
@@ -936,7 +1003,7 @@
 void
 _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs, void * stack, size_t stacksize, int kernalloc, int nozero)
 {
-	mach_vm_offset_t stackaddr = (mach_vm_offset_t)(long)stack;
+	mach_vm_offset_t stackaddr = (mach_vm_offset_t)(uintptr_t)stack;
 
 		if (nozero == 0) {
 			memset(t, 0, sizeof(*t));
@@ -946,13 +1013,13 @@
 		t->schedset = attrs->schedset;
 		t->tsd[0] = t;
 		if (kernalloc != 0) {
-			stackaddr = (mach_vm_offset_t)(long)t;
+			stackaddr = (mach_vm_offset_t)(uintptr_t)t;
 
 			/* if allocated from kernel set values appropriately */
 			t->stacksize = stacksize;
-       			t->stackaddr = (void *)(long)stackaddr;
+       			t->stackaddr = (void *)(uintptr_t)stackaddr;
 			t->freeStackOnExit = 1;
-			t->freeaddr = (void *)(long)(stackaddr - stacksize - vm_page_size);
+			t->freeaddr = (void *)(uintptr_t)(stackaddr - stacksize - vm_page_size);
 			t->freesize = pthreadsize + stacksize + vm_page_size;
 		} else {
 			t->stacksize = attrs->stacksize;
@@ -963,7 +1030,7 @@
 		t->inherit = attrs->inherit;
 		t->policy = attrs->policy;
 		t->param = attrs->param;
-		t->mutexes = (struct _pthread_mutex *)NULL;
+		t->cancel_error = 0;
 		t->sig = _PTHREAD_SIG;
 		t->reply_port = MACH_PORT_NULL;
 		t->cthread_self = NULL;
@@ -979,13 +1046,6 @@
 		t->max_tsd_key = 0;
 }
 
-static void
-_pthread_tsd_reinit(pthread_t t)
-{
-	bzero(&t->tsd[1], (_INTERNAL_POSIX_THREAD_KEYS_END-1) * sizeof(void *));
-}
-
-
 /* Need to deprecate this in future */
 int
 _pthread_is_threaded(void)
@@ -1047,12 +1107,8 @@
 size_t
 pthread_get_stacksize_np(pthread_t t)
 {
-	int ret,nestingDepth=0;
+	int ret;
 	size_t size = 0;
-	vm_address_t address=0;
-        vm_size_t region_size=0;
-	struct vm_region_submap_info_64 info;
- 	mach_msg_type_number_t  count;
 
 	if (t == NULL)
 		return(ESRCH);
@@ -1084,7 +1140,7 @@
 	void * addr = NULL;
 
 	if (t == NULL)
-		return((void *)(long)ESRCH);
+		return((void *)(uintptr_t)ESRCH);
 	
 	if(t == pthread_self() || t == &_thread) //since the main thread will not get deallocated from underneath us
 		return t->stackaddr;
@@ -1093,7 +1149,7 @@
 
 	if ((ret = _pthread_find_thread(t)) != 0) {
 		UNLOCK(_pthread_list_lock);
-		return((void *)(long)ret);
+		return((void *)(uintptr_t)ret);
 	}
 	addr = t->stackaddr;
 	UNLOCK(_pthread_list_lock);
@@ -1118,7 +1174,7 @@
 }
 
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 /* if we are passed in a pthread_t that is NULL, then we return
    the current thread's thread_id. So folks don't have to call
    pthread_self, in addition to us doing it, if they just want 
@@ -1170,15 +1226,22 @@
 int
 pthread_setname_np(const char *threadname)
 {
-	int rval;
-	size_t len;
-
-	rval = 0;
-	len = strlen(threadname);
-	rval = sysctlbyname("kern.threadname", NULL, 0, threadname, len);
-	if(rval == 0)
-	{
-		strlcpy((pthread_self())->pthread_name, threadname, len+1);	
+	int rval = 0;
+	int len = 0;
+	pthread_t current_thread = pthread_self();
+
+	if (threadname != NULL)
+		len = strlen(threadname);
+
+	/* protytype is in pthread_internals.h */
+	rval = proc_setthreadname((void *)threadname, len);
+	if (rval == 0) {
+		if (threadname != NULL) {
+			strlcpy(current_thread->pthread_name, threadname, MAXTHREADNAMESIZE);
+		} else {
+			memset(current_thread->pthread_name, 0 , MAXTHREADNAMESIZE);
+		}
+
 	}
 	return rval;
 
@@ -1309,7 +1372,7 @@
 			__kdebug_trace(0x9000008, t, 0, 0, 1, 0);
 #endif
 			if(t->freeStackOnExit)
-                                vm_deallocate(self, (mach_vm_address_t)(long)t, pthreadsize);
+                                vm_deallocate(self, (mach_vm_address_t)(uintptr_t)t, pthreadsize);
                         else
 				free(t);
 			} else if (t->childrun == 0) {
@@ -1345,7 +1408,7 @@
 #if PTH_TRACE
 			__kdebug_trace(0x9000008, t, pthreadsize, 0, 2, 0);
 #endif
-				vm_deallocate(self, (mach_vm_address_t)(long)t, pthreadsize);
+				vm_deallocate(self, (mach_vm_address_t)(uintptr_t)t, pthreadsize);
 			} else if (t->childrun == 0) {
 				TAILQ_INSERT_TAIL(&__pthread_head, t, plist);
 				_pthread_count++;
@@ -1467,9 +1530,10 @@
 	int newstyle = 0;
 	int ret;
 
-	if ((ret = _pthread_lookup_thread(thread, NULL, 1)) != 0)
+	if ((ret = _pthread_lookup_thread(thread, NULL, 1)) != 0) {
 		return (ret); /* Not a valid thread */
-		
+	}
+
 	LOCK(thread->lock);
 	newstyle = thread->newstyle;
 	if (thread->detached & PTHREAD_CREATE_JOINABLE)
@@ -1507,7 +1571,7 @@
 }
 
 
-/* 
+/*
  * pthread_kill call to system call
  */
 int   
@@ -1921,16 +1985,21 @@
 	return (t1 == t2);
 }
 
-__private_extern__ void
+// Force LLVM not to optimise this to a call to __pthread_set_self, if it does
+// then _pthread_set_self won't be bound when secondary threads try and start up.
+void __attribute__((noinline))
 _pthread_set_self(pthread_t p)
 {
-	extern void __pthread_set_self(pthread_t);
+	extern void __pthread_set_self(void *);
+
         if (p == 0) {
-                bzero(&_thread, sizeof(struct _pthread));
+                if (_thread.tsd[0] != 0) {
+                        bzero(&_thread, sizeof(struct _pthread));
+                }
                 p = &_thread;
         }
         p->tsd[0] = p;
-	__pthread_set_self(p);
+	__pthread_set_self(&p->tsd[0]);
 }
 
 void 
@@ -2022,7 +2091,7 @@
 /*
  * Perform package initialization - called automatically when application starts
  */
-__private_extern__ int
+int
 pthread_init(void)
 {
 	pthread_attr_t *attrs;
@@ -2082,6 +2151,7 @@
 	workq_targetconc[WORKQ_HIGH_PRIOQUEUE] = ncpus;
 	workq_targetconc[WORKQ_DEFAULT_PRIOQUEUE] = ncpus;
 	workq_targetconc[WORKQ_LOW_PRIOQUEUE] =  ncpus;
+	workq_targetconc[WORKQ_BG_PRIOQUEUE] =  ncpus;
 
 	mach_port_deallocate(mach_task_self(), host);
     
@@ -2089,9 +2159,6 @@
 	IF_ROSETTA() {
 		__oldstyle = 1;
 	}
-#endif
-#if defined(__arm__)
-	__oldstyle = 1;
 #endif
 
 #if defined(_OBJC_PAGE_BASE_ADDRESS)
@@ -2107,21 +2174,22 @@
 		/* We ignore the return result here. The ObjC runtime will just have to deal. */
 }
 #endif
-
+	//added so that thread_recycle_port is initialized on new launch.
+	_pthread_fork_child_postinit();
 	mig_init(1);		/* enable multi-threaded mig interfaces */
 	if (__oldstyle == 0) {
-#if defined(__i386__) || defined(__x86_64__)
-		__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (__uint64_t)(&thread->tsd[__PTK_LIBDISPATCH_KEY0]) - (__uint64_t)thread);
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+		__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (uintptr_t)(&thread->tsd[__PTK_LIBDISPATCH_KEY0]) - (uintptr_t)(&thread->tsd[0]));
 #else
-		__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)), NULL, &workq_targetconc[0], (__uint64_t)&thread->tsd[__PTK_LIBDISPATCH_KEY0] - (__uint64_t)thread);
-#endif
-	}
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+		__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)), NULL, &workq_targetconc[0], (uintptr_t)&thread->tsd[__PTK_LIBDISPATCH_KEY0] - (uintptr_t)thread);
+#endif
+	}
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 	if( (thread->thread_id = __thread_selfid()) == (__uint64_t)-1)
 		printf("Failed to set thread_id in pthread_init\n");
+#endif
 	return 0;
-#endif
 }
 
 int sched_yield(void)
@@ -2131,8 +2199,12 @@
 }
 
 /* This used to be the "magic" that gets the initialization routine called when the application starts */
-static int _do_nothing(void) { return 0; }
-int (*_cthread_init_routine)(void) = _do_nothing;
+/*
+ * (These has been moved to setenv.c, so we can use it to fix a less than 10.5
+ * crt1.o issue)
+ * static int _do_nothing(void) { return 0; }
+ * int (*_cthread_init_routine)(void) = _do_nothing;
+ */
 
 /* Get a semaphore from the pool, growing it if necessary */
 
@@ -2183,10 +2255,19 @@
 	__kdebug_trace(0x900000c, p, 0, 0, 10, 0);
 #endif
 	_pthread_count = 1;
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 	if( (p->thread_id = __thread_selfid()) == (__uint64_t)-1)
 		printf("Failed to set thread_id in pthread_fork_child\n");
 #endif
+}
+
+void _pthread_fork_child_postinit() {
+       kern_return_t kr;
+
+       kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &thread_recycle_port);
+       if (kr != KERN_SUCCESS) {
+               abort();
+       }
 }
 
 /*
@@ -2242,7 +2323,7 @@
 _pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming)
 {
 	kern_return_t res;
-	int detached = 0, ret;
+	int ret;
 
 #if PTH_TRACE
 	__kdebug_trace(0x9000028, thread, 0, 0, 1, 0);
@@ -2291,7 +2372,7 @@
 #if PTH_TRACE
 	__kdebug_trace(0x9000028, thread, 0, 0, 2, 0);
 #endif
-		vm_deallocate(mach_task_self(), (mach_vm_address_t)(long)thread, pthreadsize);
+		vm_deallocate(mach_task_self(), (mach_vm_address_t)(uintptr_t)thread, pthreadsize);
 	} else {
 		thread->sig = _PTHREAD_NO_SIG;
 #if PTH_TRACE
@@ -2393,6 +2474,7 @@
 			case WORKQ_HIGH_PRIOQUEUE:
 			case WORKQ_DEFAULT_PRIOQUEUE:
 			case WORKQ_LOW_PRIOQUEUE:
+			case WORKQ_BG_PRIOQUEUE:
         			attr->queueprio = qprio;
 				break;
 			default:
@@ -2448,6 +2530,10 @@
 {
 	int ret;
 
+	if (__workqueue_newspis != 0)
+		return(EPERM);
+	__workqueue_oldspis = 1;	
+
 	workqueue_list_lock();
 	ret =_pthread_work_internal_init();
 	workqueue_list_unlock();
@@ -2459,6 +2545,9 @@
 pthread_workqueue_requestconcurrency_np(int queue, int request_concurrency)
 {
 	int error = 0;
+
+	if (__workqueue_newspis != 0)
+		return(EPERM);
 
 	if (queue < 0 || queue > WORKQ_NUM_PRIOQUEUE)
 		return(EINVAL);
@@ -2493,13 +2582,33 @@
 void
 pthread_workqueue_atfork_child(void)
 {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+	pthread_t self = pthread_self();
+
+	__workqueue_list_lock = OS_SPINLOCK_INIT;
+
+	/* already using new spis? */
+	if (__workqueue_newspis != 0) {
+		/* prepare the kernel for workq action */
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+		__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (uintptr_t)(&self->tsd[__PTK_LIBDISPATCH_KEY0]) - (uintptr_t)(&self->tsd[0]));
+#else
+		__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL,0);
+#endif
+		(void)__workq_open();
+		kernel_workq_setup = 1;
+		return;
+	}
+
+	/* not using old spis either? */
+	if (__workqueue_oldspis == 0)
+		return;
+
 	/* 
 	 * NOTE:  workq additions here  
 	 * are for i386,x86_64 only as
 	 * ppc and arm do not support it
 	 */
-	__workqueue_list_lock = OS_SPINLOCK_INIT;
 	if (kernel_workq_setup != 0){
 	   kernel_workq_setup = 0;
 	   _pthread_work_internal_init();
@@ -2513,31 +2622,47 @@
 {
 	int i, error;
 	pthread_workqueue_head_t headp;
-	pthread_workitem_t witemp;
 	pthread_workqueue_t wq;
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+	pthread_t self = pthread_self();
+#endif
 
 	if (kernel_workq_setup == 0) {
-#if defined(__i386__) || defined(__x86_64__)
-		__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
+		__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (uintptr_t)(&self->tsd[__PTK_LIBDISPATCH_KEY0]) - (uintptr_t)(&self->tsd[0]));
 #else
-		__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
+		__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL,0);
 #endif
 
 		_pthread_wq_attr_default.queueprio = WORKQ_DEFAULT_PRIOQUEUE;
 		_pthread_wq_attr_default.sig = PTHREAD_WORKQUEUE_ATTR_SIG;
 
-		for( i = 0; i< WQ_NUM_PRIO_QS; i++) {
+		for( i = 0; i< WORKQ_NUM_PRIOQUEUE; i++) {
 			headp = __pthread_wq_head_tbl[i];
 			TAILQ_INIT(&headp->wqhead);
 			headp->next_workq = 0;
 		}
 
-		/* create work item and workqueue pools */
-		witemp = (struct _pthread_workitem *)malloc(sizeof(struct _pthread_workitem) * WORKITEM_POOL_SIZE);
-		bzero(witemp, (sizeof(struct _pthread_workitem) * WORKITEM_POOL_SIZE));
-		for (i = 0; i < WORKITEM_POOL_SIZE; i++) {
-			TAILQ_INSERT_TAIL(&__pthread_workitem_pool_head, &witemp[i], item_entry);
-		}
+		__workqueue_pool_ptr = NULL;
+		__workqueue_pool_size = round_page(sizeof(struct _pthread_workitem) * WORKITEM_POOL_SIZE);
+
+		__workqueue_pool_ptr = (struct _pthread_workitem *)mmap(NULL, __workqueue_pool_size,
+			PROT_READ|PROT_WRITE,
+			MAP_ANON | MAP_PRIVATE,
+			0,
+			0);
+
+		if (__workqueue_pool_ptr == MAP_FAILED) {
+			/* Not expected to fail, if it does, always malloc for work items */
+			__workqueue_nitems = WORKITEM_POOL_SIZE;
+			__workqueue_pool_ptr = NULL;
+		} else 
+			__workqueue_nitems = 0;
+
+		/* sets up the workitem pool */
+		grow_workitem();
+
+		/* since the size is less than a page, leaving this in malloc pool */
 		wq = (struct _pthread_workqueue *)malloc(sizeof(struct _pthread_workqueue) * WORKQUEUE_POOL_SIZE);
 		bzero(wq, (sizeof(struct _pthread_workqueue) * WORKQUEUE_POOL_SIZE));
 		for (i = 0; i < WORKQUEUE_POOL_SIZE; i++) {
@@ -2547,7 +2672,9 @@
 		if (error = __workq_open()) {
 			TAILQ_INIT(&__pthread_workitem_pool_head);
 			TAILQ_INIT(&__pthread_workqueue_pool_head);
-			free(witemp);
+			if (__workqueue_pool_ptr != NULL) {
+				munmap((void *)__workqueue_pool_ptr, __workqueue_pool_size);
+			}
 			free(wq);
 			return(ENOMEM);
 		}
@@ -2564,14 +2691,25 @@
 	pthread_workitem_t witem;
 
 	if (TAILQ_EMPTY(&__pthread_workitem_pool_head)) {
-		workqueue_list_unlock();
-		witem = malloc(sizeof(struct _pthread_workitem));
-		witem->gencount = 0;
-		workqueue_list_lock();
-	} else {
-		witem = TAILQ_FIRST(&__pthread_workitem_pool_head);
-		TAILQ_REMOVE(&__pthread_workitem_pool_head, witem, item_entry);
-	}
+		/* the chunk size is set so some multiple of it is pool size */
+		if (__workqueue_nitems < WORKITEM_POOL_SIZE) {
+			grow_workitem();
+		} else {
+			workqueue_list_unlock();
+			witem = malloc(sizeof(struct _pthread_workitem));
+			workqueue_list_lock();
+			witem->fromcache = 0;
+			goto out;
+		}
+	}
+	witem = TAILQ_FIRST(&__pthread_workitem_pool_head);
+	TAILQ_REMOVE(&__pthread_workitem_pool_head, witem, item_entry);
+	witem->fromcache = 1;
+out:
+	witem->flags = 0;
+	witem->item_entry.tqe_next = 0;
+	witem->item_entry.tqe_prev = 0;
+	user_workitem_count++;
 	return(witem);
 }
 
@@ -2579,8 +2717,27 @@
 static void
 free_workitem(pthread_workitem_t witem) 
 {
-	witem->gencount++;
-	TAILQ_INSERT_TAIL(&__pthread_workitem_pool_head, witem, item_entry);
+	user_workitem_count--;
+	witem->flags = 0;
+	if (witem->fromcache != 0)
+		TAILQ_INSERT_TAIL(&__pthread_workitem_pool_head, witem, item_entry);
+	else
+		free(witem);
+}
+
+static void
+grow_workitem(void)
+{
+	pthread_workitem_t witemp;
+	int i;
+
+	 witemp = &__workqueue_pool_ptr[__workqueue_nitems];
+	bzero(witemp, (sizeof(struct _pthread_workitem) * WORKITEM_CHUNK_SIZE));
+	for (i = 0; i < WORKITEM_CHUNK_SIZE; i++) {
+		witemp[i].fromcache = 1;
+		TAILQ_INSERT_TAIL(&__pthread_workitem_pool_head, &witemp[i], item_entry);
+	}
+	__workqueue_nitems += WORKITEM_CHUNK_SIZE;
 }
 
 /* This routine is called with list lock held */
@@ -2658,7 +2815,7 @@
 loop:
 	while (kernel_workq_count < KERNEL_WORKQ_ELEM_MAX) {
 		found = 0;
-		for (i = 0; i < WQ_NUM_PRIO_QS; i++)  {
+		for (i = 0; i < WORKQ_NUM_PRIOQUEUE; i++)  {
 			wqreadyprio = i;	/* because there is nothing else higher to run */
 			headp = __pthread_wq_head_tbl[i];
 
@@ -2779,7 +2936,6 @@
 #if WQ_LISTTRACE
 	__kdebug_trace(0x90080a8, workq, &workq->item_listhead, workq->item_listhead.tqh_first,  workq->item_listhead.tqh_last, 0);
 #endif
-				witem->flags = 0;
 				free_workitem(witem);
 #if WQ_TRACE
 			__kdebug_trace(0x9000064, 4, workq->barrier_count, 0, 0, 0);
@@ -2808,7 +2964,6 @@
 				__kdebug_trace(0x900006c, workq, workq->kq_count, 0, 0xff, 0);
 #endif
 				}
-				witem->flags = 0;
 				free_workitem(witem);
 				workq->flags |= PTHREAD_WORKQ_DESTROYED;
 #if WQ_TRACE
@@ -2855,7 +3010,7 @@
 				workq->kq_count++;
 				witem->flags |= PTH_WQITEM_KERN_COUNT;
 			}
-			OSAtomicIncrement32(&kernel_workq_count);
+			OSAtomicIncrement32Barrier(&kernel_workq_count);
 			workqueue_list_unlock();
 			
 			prio = workq->queueprio;
@@ -2864,7 +3019,7 @@
 			}
 
 			if (( error =__workq_kernreturn(WQOPS_QUEUE_ADD, witem, workq->affinity, prio)) == -1) {
-				OSAtomicDecrement32(&kernel_workq_count);
+				OSAtomicDecrement32Barrier(&kernel_workq_count);
 				workqueue_list_lock();
 #if WQ_TRACE
 			__kdebug_trace(0x900007c, witem, workq, witem->func_arg, workq->kq_count, 0);
@@ -2900,10 +3055,25 @@
 #if WQ_DEBUG
 	pthread_t pself;
 #endif
-
-
-	workq = item->workq;
-	if (reuse == 0) {
+	int thread_reuse = 0;
+	int thread_priority = 0;
+	int thread_newspi = 0;
+	int thread_options = 0;
+
+	if (reuse & WQ_FLAG_THREAD_NEWSPI) {
+		thread_reuse = reuse & WQ_FLAG_THREAD_REUSE;
+		if ((reuse & WQ_FLAG_THREAD_OVERCOMMIT) != 0)
+			thread_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT;
+		thread_priority = reuse & WQ_FLAG_THREAD_PRIOMASK;
+		thread_newspi = 1;
+		workq = NULL;
+	} else {
+		thread_reuse = (reuse == 0)? 0: WQ_FLAG_THREAD_REUSE;
+		workq = item->workq;
+	}
+
+	
+	if (thread_reuse == 0) {
 		/* reuse is set to 0, when a thread is newly created to run a workitem */
 		_pthread_struct_init(self, attrs, stackaddr,  DEFAULT_STACK_SIZE, 1, 1);
 		self->wqthread = 1;
@@ -2913,15 +3083,20 @@
 		/* These are not joinable threads */
 		self->detached &= ~PTHREAD_CREATE_JOINABLE;
 		self->detached |= PTHREAD_CREATE_DETACHED;
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 		_pthread_set_self(self);
 #endif
 #if WQ_TRACE
 		__kdebug_trace(0x9000050, self, item, item->func_arg, 0, 0);
 #endif
 		self->kernel_thread = kport;
-		self->fun = (void *(*)(void *))item->func;
-		self->arg = item->func_arg;
+		if (thread_newspi != 0) {
+			self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+			self->arg = thread_priority;
+		} else {
+			self->fun = (void *(*)(void *))item->func;
+			self->arg = item->func_arg;
+		}
 		/* Add to the pthread list */
 		LOCK(_pthread_list_lock);
 		TAILQ_INSERT_TAIL(&__pthread_head, self, plist);
@@ -2931,7 +3106,7 @@
 		_pthread_count++;
 		UNLOCK(_pthread_list_lock);
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
 		if( (self->thread_id = __thread_selfid()) == (__uint64_t)-1)
 			printf("Failed to set thread_id in pthread_wqthread\n");
 #endif
@@ -2945,8 +3120,13 @@
 		if (self == NULL)
 			LIBC_ABORT("_pthread_wqthread: pthread %p setup to be NULL", self);
 
-		self->fun = (void *(*)(void *))item->func;
-		self->arg = item->func_arg;
+		if (thread_newspi != 0) {
+			self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+			self->arg = NULL;
+		} else {
+			self->fun = (void *(*)(void *))item->func;
+			self->arg = item->func_arg;
+		}
 	}
 
 #if WQ_DEBUG
@@ -2972,27 +3152,29 @@
 	}
 #endif /* WQ_DEBUG */
 
-	self->cur_workq = workq;
-	self->cur_workitem = item;
-	OSAtomicDecrement32(&kernel_workq_count);
-
-	ret = (int)(intptr_t)(*self->fun)(self->arg);
-
-	/* If we reach here without going through the above initialization path then don't go through
-	 * with the teardown code path ( e.g. setjmp/longjmp ). Instead just exit this thread.
-	 */
-	if(self != pthread_self()) {
-		pthread_exit(PTHREAD_CANCELED);
-	}
-	
-	workqueue_exit(self, workq, item);
-
+	if (thread_newspi != 0) {
+		(*__libdispatch_workerfunction)(thread_priority, thread_options, NULL);
+		_pthread_workq_return(self);
+	 } else {
+		self->cur_workq = workq;
+		self->cur_workitem = item;
+		OSAtomicDecrement32Barrier(&kernel_workq_count);
+
+		ret = (int)(intptr_t)(*self->fun)(self->arg);
+		/* If we reach here without going through the above initialization path then don't go through
+		* with the teardown code path ( e.g. setjmp/longjmp ). Instead just exit this thread.
+		*/
+		if (self != pthread_self()) {
+			pthread_exit(PTHREAD_CANCELED);
+		}
+
+		workqueue_exit(self, workq, item);
+	}
 }
 
 static void
 workqueue_exit(pthread_t self, pthread_workqueue_t workq, pthread_workitem_t item)
 {
-	pthread_attr_t *attrs = &_pthread_attr_default;
 	pthread_workitem_t baritem;
 	pthread_workqueue_head_t headp;
 	void (*func)(pthread_workqueue_t, void *);
@@ -3004,7 +3186,6 @@
 #if WQ_TRACE
 	__kdebug_trace(0x9000070, self, 1, item->func_arg, workq->kq_count, 0);
 #endif
-	item->flags = 0;
 	free_workitem(item);
 
 	if ((workq->flags & PTHREAD_WORKQ_BARRIER_ON) == PTHREAD_WORKQ_BARRIER_ON) {
@@ -3030,7 +3211,6 @@
 #if WQ_LISTTRACE
 	__kdebug_trace(0x90080a8, workq, &workq->item_listhead, workq->item_listhead.tqh_first,  workq->item_listhead.tqh_last, 0);
 #endif
-			baritem->flags = 0;
 			free_workitem(baritem);
 			workq->flags &= ~PTHREAD_WORKQ_BARRIER_ON;
 #if WQ_TRACE
@@ -3088,21 +3268,78 @@
 
 /* XXXXXXXXXXXXX Pthread Workqueue functions XXXXXXXXXXXXXXXXXX */
 
+int
+pthread_workqueue_setdispatch_np(void (*worker_func)(int, int, void *))
+{
+	int error = 0;
+
+	if (__workqueue_oldspis != 0)
+		return(EPERM);
+
+	__workqueue_newspis = 1;	
+
+	if (__libdispatch_workerfunction == NULL) {
+		__libdispatch_workerfunction = worker_func;
+		/* check whether the kernel supports new SPIs */
+		error = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, 0, 0);
+		if (error == -1){
+			__libdispatch_workerfunction = NULL;
+			error = ENOTSUP;
+			__workqueue_newspis = 0;
+		} else  {
+			/* prepare the kernel for workq action */
+			(void)__workq_open();
+			kernel_workq_setup = 1;
+			if (__is_threaded == 0)
+				__is_threaded = 1;
+			__workqueue_newspis = 1;	
+		}
+	} else {
+		error = EBUSY;
+	}
+
+	return(error);
+}
+
+int
+pthread_workqueue_addthreads_np(int queue_priority, int options, int numthreads)
+{
+	int priority = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
+	int error = 0;
+
+	/* new spi not inited yet?? */
+	if (__workqueue_newspis == 0)
+		return(EPERM);
+
+
+	if ((options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) != 0)
+		priority |= WORKQUEUE_OVERCOMMIT;
+		
+		error = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, priority);
+
+		if (error == -1)
+			return(errno);
+		else
+			return(0);
+}
+
 int 
 pthread_workqueue_create_np(pthread_workqueue_t * workqp, const pthread_workqueue_attr_t * attr)
 {
 	pthread_workqueue_t wq;
 	pthread_workqueue_head_t headp;
 
-#if defined(__arm__)
-	/* not supported under arm */
-	return(ENOTSUP);
-#endif
 #if defined(__ppc__)
 	IF_ROSETTA() {
 		return(ENOTSUP);
 	}
 #endif
+	if (__workqueue_newspis != 0)
+		return(EPERM);
+
+	if (__workqueue_oldspis == 0)
+		__workqueue_oldspis = 1;	
+
 	if ((attr != NULL) && (attr->sig != PTHREAD_WORKQUEUE_ATTR_SIG)) {
 		return(EINVAL);
 	}
@@ -3140,6 +3377,9 @@
 pthread_workqueue_additem_np(pthread_workqueue_t workq, void ( *workitem_func)(void *), void * workitem_arg, pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp)
 {
 	pthread_workitem_t witem;
+
+	if (__workqueue_newspis != 0)
+		return(EPERM);
 
 	if (valid_workq(workq) == 0) {
 		return(EINVAL);
@@ -3154,10 +3394,7 @@
 	witem = alloc_workitem();
 	witem->func = workitem_func;
 	witem->func_arg = workitem_arg;
-	witem->flags = 0;
 	witem->workq = workq;
-	witem->item_entry.tqe_next = 0;
-	witem->item_entry.tqe_prev = 0;
 
 	/* alloc workitem can drop the lock, check the state  */
 	if ((workq->flags & (PTHREAD_WORKQ_IN_TERMINATE | PTHREAD_WORKQ_DESTROYED)) != 0) {
@@ -3170,7 +3407,7 @@
 	if (itemhandlep != NULL)
 		*itemhandlep = (pthread_workitem_handle_t *)witem;
 	if (gencountp != NULL)
-		*gencountp = witem->gencount;
+		*gencountp = 0;
 #if WQ_TRACE
 	__kdebug_trace(0x9008090, witem, witem->func, witem->func_arg,  workq, 0);
 #endif
@@ -3190,7 +3427,8 @@
 int
 pthread_workqueue_getovercommit_np(pthread_workqueue_t workq,  unsigned int *ocommp)
 {
-        pthread_workitem_t witem;
+	if (__workqueue_newspis != 0)
+		return(EPERM);
 
         if (valid_workq(workq) == 0) {
                 return(EINVAL);
@@ -3202,16 +3440,12 @@
 }
 
 
-/* DEPRECATED 
-int pthread_workqueue_removeitem_np(pthread_workqueue_t workq, pthread_workitem_handle_t itemhandle, unsigned int gencount)
-int pthread_workqueue_addbarrier_np(pthread_workqueue_t workq, void (* callback_func)(pthread_workqueue_t, void *), void * callback_arg, pthread_workitem_handle_t *itemhandlep, unsigned int *gencountp)
-int pthread_workqueue_suspend_np(pthread_workqueue_t workq)
-int pthread_workqueue_resume_np(pthread_workqueue_t workq)
-*/
-
 #else /* !BUILDING_VARIANT ] [ */
 extern int __unix_conforming;
 extern int _pthread_count;
+extern int __workqueue_newspis;
+extern int __workqueue_oldspis;
+
 extern pthread_lock_t _pthread_list_lock;
 extern void _pthread_testcancel(pthread_t thread, int isconforming);
 extern int _pthread_reap_thread(pthread_t th, mach_port_t kernel_thread, void **value_ptr, int conforming);
@@ -3227,8 +3461,7 @@
 	int already_exited, res;
 	void * dummy;
 	semaphore_t death;
-	mach_port_t joinport;
-	int newstyle = 0;
+	int newstyle;
 
 	LOCK(thread->lock);
 	already_exited = (thread->detached & _PTHREAD_EXITED);