Loading...
iokit/Kernel/IOSharedDataQueue.cpp xnu-3248.50.21 xnu-4903.221.2
--- xnu/xnu-3248.50.21/iokit/Kernel/IOSharedDataQueue.cpp
+++ xnu/xnu-4903.221.2/iokit/Kernel/IOSharedDataQueue.cpp
@@ -157,25 +157,36 @@
 
 IODataQueueEntry * IOSharedDataQueue::peek()
 {
-    IODataQueueEntry *entry = 0;
-
-    if (dataQueue && (dataQueue->head != dataQueue->tail)) {
+    IODataQueueEntry *entry      = 0;
+    UInt32            headOffset;
+    UInt32            tailOffset;
+
+    if (!dataQueue) {
+        return NULL;
+    }
+
+    // Read head and tail with acquire barrier
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    headOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+    tailOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_ACQUIRE);
+
+    if (headOffset != tailOffset) {
         IODataQueueEntry *  head        = 0;
         UInt32              headSize    = 0;
         UInt32              headOffset  = dataQueue->head;
         UInt32              queueSize   = getQueueSize();
-        
+
         if (headOffset >= queueSize) {
             return NULL;
         }
-        
+
         head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
         headSize     = head->size;
-        
+
         // Check if there's enough room before the end of the queue for a header.
         // If there is room, check if there's enough room to hold the header and
         // the data.
-        
+
         if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
             (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
             (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
@@ -194,11 +205,17 @@
 
 Boolean IOSharedDataQueue::enqueue(void * data, UInt32 dataSize)
 {
-    const UInt32       head      = dataQueue->head;  // volatile
-    const UInt32       tail      = dataQueue->tail;
+    UInt32             head;
+    UInt32             tail;
+    UInt32             newTail;
     const UInt32       entrySize = dataSize + DATA_QUEUE_ENTRY_HEADER_SIZE;
     IODataQueueEntry * entry;
     
+    // Force a single read of head and tail
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    tail = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_RELAXED);
+    head = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_ACQUIRE);
+
     // Check for overflow of entrySize
     if (dataSize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) {
         return false;
@@ -223,7 +240,7 @@
             // exactly matches the available space at the end of the queue.
             // The tail can range from 0 to dataQueue->queueSize inclusive.
             
-            OSAddAtomic(entrySize, (SInt32 *)&dataQueue->tail);
+            newTail = tail + entrySize;
         }
         else if ( head > entrySize )     // Is there enough room at the beginning?
         {
@@ -242,7 +259,7 @@
             }
             
             memcpy(&dataQueue->queue->data, data, dataSize);
-            OSCompareAndSwap(dataQueue->tail, entrySize, &dataQueue->tail);
+            newTail = entrySize;
         }
         else
         {
@@ -260,23 +277,36 @@
             
             entry->size = dataSize;
             memcpy(&entry->data, data, dataSize);
-            OSAddAtomic(entrySize, (SInt32 *)&dataQueue->tail);
+            newTail = tail + entrySize;
         }
         else
         {
             return false;    // queue is full
         }
     }
-    
-    // Send notification (via mach message) that data is available.
-    
-    if ( ( head == tail )                                                   /* queue was empty prior to enqueue() */
-        ||   ( dataQueue->head == tail ) )   /* queue was emptied during enqueue() */
-    {
-        sendDataAvailableNotification();
-    }
-    
-    return true;
+
+	// Publish the data we just enqueued
+	__c11_atomic_store((_Atomic UInt32 *)&dataQueue->tail, newTail, __ATOMIC_RELEASE);
+
+	if (tail != head) {
+		//
+		// The memory barrier below paris with the one in ::dequeue
+		// so that either our store to the tail cannot be missed by
+		// the next dequeue attempt, or we will observe the dequeuer
+		// making the queue empty.
+		//
+		// Of course, if we already think the queue is empty,
+		// there's no point paying this extra cost.
+		//
+		__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+		head = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+	}
+
+	if (tail == head) {
+		// Send notification (via mach message) that data is now available.
+		sendDataAvailableNotification();
+	}
+	return true;
 }
 
 Boolean IOSharedDataQueue::dequeue(void *data, UInt32 *dataSize)
@@ -284,76 +314,82 @@
     Boolean             retVal          = TRUE;
     IODataQueueEntry *  entry           = 0;
     UInt32              entrySize       = 0;
+    UInt32              headOffset      = 0;
+    UInt32              tailOffset      = 0;
     UInt32              newHeadOffset   = 0;
 
-    if (dataQueue) {
-        if (dataQueue->head != dataQueue->tail) {
-            IODataQueueEntry *  head        = 0;
-            UInt32              headSize    = 0;
-            UInt32              headOffset  = dataQueue->head;
-            UInt32              queueSize   = getQueueSize();
-            
-            if (headOffset > queueSize) {
+	if (!dataQueue || (data && !dataSize)) {
+        return false;
+    }
+
+    // Read head and tail with acquire barrier
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    headOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+    tailOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_ACQUIRE);
+
+    if (headOffset != tailOffset) {
+        IODataQueueEntry *  head        = 0;
+        UInt32              headSize    = 0;
+        UInt32              queueSize   = getQueueSize();
+
+        if (headOffset > queueSize) {
+            return false;
+        }
+
+        head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
+        headSize     = head->size;
+
+        // we wrapped around to beginning, so read from there
+        // either there was not even room for the header
+        if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+            (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
+            // or there was room for the header, but not for the data
+            (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
+            (headOffset + headSize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
+            // Note: we have to wrap to the beginning even with the UINT32_MAX checks
+            // because we have to support a queueSize of UINT32_MAX.
+            entry           = dataQueue->queue;
+            entrySize       = entry->size;
+            if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
                 return false;
             }
-            
-            head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
-            headSize     = head->size;
-            
-            // we wrapped around to beginning, so read from there
-            // either there was not even room for the header
-            if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
-                // or there was room for the header, but not for the data
-                (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
-                (headOffset + headSize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
-                // Note: we have to wrap to the beginning even with the UINT32_MAX checks
-                // because we have to support a queueSize of UINT32_MAX.
-                entry           = dataQueue->queue;
-                entrySize       = entry->size;
-                if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
-                    return false;
-                }
-                newHeadOffset   = entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
-                // else it is at the end
-            } else {
-                entry           = head;
-                entrySize       = entry->size;
-                if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headOffset) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE + headOffset > queueSize)) {
-                    return false;
-                }
-                newHeadOffset   = headOffset + entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
+            newHeadOffset   = entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
+            // else it is at the end
+        } else {
+            entry           = head;
+            entrySize       = entry->size;
+            if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headOffset) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE + headOffset > queueSize)) {
+                return false;
             }
-        }
-        
-        if (entry) {
-            if (data) {
-                if (dataSize) {
-                    if (entrySize <= *dataSize) {
-                        memcpy(data, &(entry->data), entrySize);
-                        OSCompareAndSwap( dataQueue->head, newHeadOffset, (SInt32 *)&dataQueue->head);
-                    } else {
-                        retVal = FALSE;
-                    }
-                } else {
-                    retVal = FALSE;
-                }
-            } else {
-                OSCompareAndSwap( dataQueue->head, newHeadOffset, (SInt32 *)&dataQueue->head);
-            }
-
-            if (dataSize) {
-                *dataSize = entrySize;
-            }
-        } else {
-            retVal = FALSE;
-        }
-    } else {
-        retVal = FALSE;
-    }
+            newHeadOffset   = headOffset + entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
+        }
+	} else {
+		// empty queue
+		return false;
+	}
+
+	if (data) {
+		if (entrySize > *dataSize) {
+			// not enough space
+			return false;
+		}
+		memcpy(data, &(entry->data), entrySize);
+		*dataSize = entrySize;
+	}
+
+	__c11_atomic_store((_Atomic UInt32 *)&dataQueue->head, newHeadOffset, __ATOMIC_RELEASE);
+
+	if (newHeadOffset == tailOffset) {
+		//
+		// If we are making the queue empty, then we need to make sure
+		// that either the enqueuer notices, or we notice the enqueue
+		// that raced with our making of the queue empty.
+		//
+		__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	}
     
     return retVal;
 }