From 766b92311632a73939ee374da6456ff1278b9986 Mon Sep 17 00:00:00 2001
From: Antonio SJ Musumeci <trapexit@spawn.link>
Date: Sat, 9 Sep 2023 15:05:17 -0500
Subject: [PATCH] Fix thread pool destruction where threads don't explicitly
 exit themselves

---
 libfuse/Makefile                              |    3 +-
 .../moodycamel/blockingconcurrentqueue.h      |    0
 .../moodycamel/concurrentqueue.h              |    0
 .../moodycamel/lightweightsemaphore.h         |    0
 libfuse/{lib => include}/thread_pool.hpp      |   54 +-
 libfuse/lib/fuse.c                            |    7 +-
 libfuse/lib/fuse_loop.cpp                     |   25 +-
 libfuse/lib/syslog.c                          |  114 -
 libfuse/lib/syslog.h                          |   31 -
 src/moodycamel/blockingconcurrentqueue.h      |  582 ---
 src/moodycamel/concurrentqueue.h              | 3747 -----------------
 src/moodycamel/lightweightsemaphore.h         |  427 --
 src/thread_pool.hpp                           |  303 --
 13 files changed, 49 insertions(+), 5244 deletions(-)
 rename libfuse/{lib => include}/moodycamel/blockingconcurrentqueue.h (100%)
 rename libfuse/{lib => include}/moodycamel/concurrentqueue.h (100%)
 rename libfuse/{lib => include}/moodycamel/lightweightsemaphore.h (100%)
 rename libfuse/{lib => include}/thread_pool.hpp (84%)
 delete mode 100644 libfuse/lib/syslog.c
 delete mode 100644 libfuse/lib/syslog.h
 delete mode 100644 src/moodycamel/blockingconcurrentqueue.h
 delete mode 100644 src/moodycamel/concurrentqueue.h
 delete mode 100644 src/moodycamel/lightweightsemaphore.h
 delete mode 100644 src/thread_pool.hpp
diff --git a/libfuse/Makefile b/libfuse/Makefile
index e9f0af24..a20b0742 100644
--- a/libfuse/Makefile
+++ b/libfuse/Makefile
@@ -50,8 +50,7 @@ SRC_C = \
 	lib/fuse_session.c \
 	lib/fuse_signals.c \
 	lib/helper.c \
-	lib/mount.c \
-	lib/syslog.c
+	lib/mount.c
 SRC_CPP = \
 	lib/format.cpp \
 	lib/os.cpp \
diff --git a/libfuse/lib/moodycamel/blockingconcurrentqueue.h b/libfuse/include/moodycamel/blockingconcurrentqueue.h
similarity index 100%
rename from libfuse/lib/moodycamel/blockingconcurrentqueue.h
rename to libfuse/include/moodycamel/blockingconcurrentqueue.h
diff --git a/libfuse/lib/moodycamel/concurrentqueue.h b/libfuse/include/moodycamel/concurrentqueue.h
similarity index 100%
rename from libfuse/lib/moodycamel/concurrentqueue.h
rename to libfuse/include/moodycamel/concurrentqueue.h
diff --git a/libfuse/lib/moodycamel/lightweightsemaphore.h b/libfuse/include/moodycamel/lightweightsemaphore.h
similarity index 100%
rename from libfuse/lib/moodycamel/lightweightsemaphore.h
rename to libfuse/include/moodycamel/lightweightsemaphore.h
diff --git a/libfuse/lib/thread_pool.hpp b/libfuse/include/thread_pool.hpp
similarity index 84%
rename from libfuse/lib/thread_pool.hpp
rename to libfuse/include/thread_pool.hpp
index c7c000b1..1a2aa539 100644
--- a/libfuse/lib/thread_pool.hpp
+++ b/libfuse/include/thread_pool.hpp
@@ -1,7 +1,6 @@
 #pragma once
 
 #include "moodycamel/blockingconcurrentqueue.h"
-#include "syslog.h"
 
 #include <atomic>
 #include <csignal>
@@ -14,6 +13,7 @@
 #include <thread>
 #include <vector>
 
+#include <syslog.h>
 
 struct ThreadPoolTraits : public moodycamel::ConcurrentQueueDefaultTraits
 {
@@ -35,10 +35,11 @@ public:
     : _queue(queue_depth_,thread_count_,thread_count_),
       _name(get_thread_name(name_))
   {
-    syslog_debug("threadpool: spawning %zu threads of queue depth %zu named '%s'",
-                 thread_count_,
-                 queue_depth_,
-                 _name.c_str());
+    syslog(LOG_DEBUG,
+           "threadpool: spawning %zu threads of queue depth %zu named '%s'",
+           thread_count_,
+           queue_depth_,
+           _name.c_str());
 
     sigset_t oldset;
     sigset_t newset;
@@ -55,9 +56,10 @@ public:
         rv = pthread_create(&t,NULL,ThreadPool::start_routine,this);
         if(rv != 0)
           {
-            syslog_warning("threadpool: error spawning thread - %d (%s)",
-                           rv,
-                           strerror(rv));
+            syslog(LOG_WARNING,
+                   "threadpool: error spawning thread - %d (%s)",
+                   rv,
+                   strerror(rv));
             continue;
           }
 
@@ -75,17 +77,18 @@ public:
 
   ~ThreadPool()
   {
-    syslog_debug("threadpool: destroying %zu threads named '%s'",
-                 _threads.size(),
-                 _name.c_str());
+    syslog(LOG_DEBUG,
+           "threadpool: destroying %zu threads named '%s'",
+           _threads.size(),
+           _name.c_str());
+
+    auto func = []() { pthread_exit(NULL); };
+    for(std::size_t i = 0; i < _threads.size(); i++)
+      _queue.enqueue(func);
 
     for(auto t : _threads)
       pthread_cancel(t);
 
-    Func f;
-    while(_queue.try_dequeue(f))
-      continue;
-
     for(auto t : _threads)
       pthread_join(t,NULL);
   }
@@ -142,9 +145,10 @@ public:
 
     if(rv != 0)
       {
-        syslog_warning("threadpool: error spawning thread - %d (%s)",
-                       rv,
-                       strerror(rv));
+        syslog(LOG_WARNING,
+               "threadpool: error spawning thread - %d (%s)",
+               rv,
+               strerror(rv));
         return -rv;
       }
 
@@ -156,9 +160,10 @@ public:
       _threads.push_back(t);
     }
 
-    syslog_debug("threadpool: 1 thread added to pool '%s' named '%s'",
-                 _name.c_str(),
-                 name.c_str());
+    syslog(LOG_DEBUG,
+           "threadpool: 1 thread added to pool '%s' named '%s'",
+           _name.c_str(),
+           name.c_str());
 
     return 0;
   }
@@ -195,9 +200,10 @@ public:
 
       char name[16];
       pthread_getname_np(t,name,sizeof(name));
-      syslog_debug("threadpool: 1 thread removed from pool '%s' named '%s'",
-                   _name.c_str(),
-                   name);
+      syslog(LOG_DEBUG,
+             "threadpool: 1 thread removed from pool '%s' named '%s'",
+             _name.c_str(),
+             name);
 
       pthread_exit(NULL);
     };
diff --git a/libfuse/lib/fuse.c b/libfuse/lib/fuse.c
index 2ddd1674..1e9f6245 100644
--- a/libfuse/lib/fuse.c
+++ b/libfuse/lib/fuse.c
@@ -44,6 +44,7 @@
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/uio.h>
+#include <syslog.h>
 #include <time.h>
 #include <unistd.h>
 
@@ -3899,7 +3900,7 @@ fuse_invalidate_all_nodes()
 {
   struct fuse *f = fuse_get_fuse_obj();
 
-  syslog_info("invalidating file entries");
+  syslog(LOG_INFO,"invalidating file entries");
 
   pthread_mutex_lock(&f->lock);
   for(int i = 0; i < f->id_table.size; i++)
@@ -3925,7 +3926,7 @@ fuse_invalidate_all_nodes()
 void
 fuse_gc()
 {
-  syslog_info("running thorough garbage collection");
+  syslog(LOG_INFO,"running thorough garbage collection");
   node_gc();
   msgbuf_gc();
   fuse_malloc_trim();
@@ -3934,7 +3935,7 @@ fuse_gc()
 void
 fuse_gc1()
 {
-  syslog_info("running basic garbage collection");
+  syslog(LOG_INFO,"running basic garbage collection");
   node_gc1();
   msgbuf_gc_10percent();
   fuse_malloc_trim();
diff --git a/libfuse/lib/fuse_loop.cpp b/libfuse/lib/fuse_loop.cpp
index 327f945a..b702f338 100644
--- a/libfuse/lib/fuse_loop.cpp
+++ b/libfuse/lib/fuse_loop.cpp
@@ -6,7 +6,6 @@
 #include "fmt/core.h"
 #include "make_unique.hpp"
 #include "scope_guard.hpp"
-#include "syslog.h"
 #include "thread_pool.hpp"
 
 #include "fuse_i.h"
@@ -26,6 +25,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/time.h>
+#include <syslog.h>
 #include <unistd.h>
 
 #include <cassert>
@@ -449,7 +449,9 @@ pin_threads(const std::vector<pthread_t> read_threads_,
   if(type_ == "R1PPSP")
     return ::pin_threads_R1PPSP(read_threads_,process_threads_);
 
-  syslog_warning("Invalid pin-threads value, ignoring: %s",type_.c_str());
+  syslog(LOG_WARNING,
+         "Invalid pin-threads value, ignoring: %s",
+         type_.c_str());
 }
 
 static
@@ -510,15 +512,16 @@ fuse_session_loop_mt(struct fuse_session *se_,
 
   ::pin_threads(read_threads,process_threads,pin_threads_type_);
 
-  syslog_info("read-thread-count=%d; "
-              "process-thread-count=%d; "
-              "process-thread-queue-depth=%d; "
-              "pin-threads=%s;"
-              ,
-              read_thread_count,
-              process_thread_count,
-              process_thread_queue_depth,
-              pin_threads_type_.c_str());
+  syslog(LOG_INFO,
+         "read-thread-count=%d; "
+         "process-thread-count=%d; "
+         "process-thread-queue-depth=%d; "
+         "pin-threads=%s;"
+         ,
+         read_thread_count,
+         process_thread_count,
+         process_thread_queue_depth,
+         pin_threads_type_.c_str());
 
   ::wait(se_,&finished);
 
diff --git a/libfuse/lib/syslog.c b/libfuse/lib/syslog.c
deleted file mode 100644
index fff3701b..00000000
--- a/libfuse/lib/syslog.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-  ISC License
-
-  Copyright (c) 2023, Antonio SJ Musumeci <trapexit@spawn.link>
-
-  Permission to use, copy, modify, and/or distribute this software for any
-  purpose with or without fee is hereby granted, provided that the above
-  copyright notice and this permission notice appear in all copies.
-
-  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-
-#include <stdarg.h>
-#include <syslog.h>
-#include <stdbool.h>
-
-
-void
-syslog_open()
-{
-  const char *ident = "mergerfs";
-  const int   option = (LOG_CONS|LOG_PID);
-  const int   facility = LOG_USER;
-
-  openlog(ident,option,facility);
-}
-
-void
-syslog_close()
-{
-  closelog();
-}
-
-static
-void
-syslog_vlog(const int   priority_,
-            const char *format_,
-            va_list     valist_)
-{
-  vsyslog(priority_,format_,valist_);
-}
-
-void
-syslog_log(const int   priority_,
-           const char *format_,
-           ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(priority_,format_,valist);
-  va_end(valist);
-}
-
-void
-syslog_debug(const char *format_,
-             ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(LOG_DEBUG,format_,valist);
-  va_end(valist);
-}
-
-void
-syslog_info(const char *format_,
-            ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(LOG_INFO,format_,valist);
-  va_end(valist);
-}
-
-void
-syslog_notice(const char *format_,
-              ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(LOG_NOTICE,format_,valist);
-  va_end(valist);
-}
-
-void
-syslog_warning(const char *format_,
-               ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(LOG_WARNING,format_,valist);
-  va_end(valist);
-}
-
-void
-syslog_error(const char *format_,
-             ...)
-{
-  va_list valist;
-
-  va_start(valist,format_);
-  syslog_vlog(LOG_ERR,format_,valist);
-  va_end(valist);
-}
diff --git a/libfuse/lib/syslog.h b/libfuse/lib/syslog.h
deleted file mode 100644
index 847c4f60..00000000
--- a/libfuse/lib/syslog.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
-  ISC License
-
-  Copyright (c) 2023, Antonio SJ Musumeci <trapexit@spawn.link>
-
-  Permission to use, copy, modify, and/or distribute this software for any
-  purpose with or without fee is hereby granted, provided that the above
-  copyright notice and this permission notice appear in all copies.
-
-  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-
-#pragma once
-
-#include <syslog.h>
-
-
-void syslog_open();
-void syslog_log(const int priority, const char *format, ...);
-void syslog_debug(const char *format, ...);
-void syslog_info(const char *format, ...);
-void syslog_notice(const char *format, ...);
-void syslog_warning(const char *format, ...);
-void syslog_error(const char *format, ...);
-void syslog_close();
diff --git a/src/moodycamel/blockingconcurrentqueue.h b/src/moodycamel/blockingconcurrentqueue.h
deleted file mode 100644
index 205a4db7..00000000
--- a/src/moodycamel/blockingconcurrentqueue.h
+++ /dev/null
@@ -1,582 +0,0 @@
-// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
-// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
-// BSD license, available at the top of concurrentqueue.h.
-// Also dual-licensed under the Boost Software License (see LICENSE.md)
-// Uses Jeff Preshing's semaphore implementation (under the terms of its
-// separate zlib license, see lightweightsemaphore.h).
-
-#pragma once
-
-#include "concurrentqueue.h"
-#include "lightweightsemaphore.h"
-
-#include <type_traits>
-#include <cerrno>
-#include <memory>
-#include <chrono>
-#include <ctime>
-
-namespace moodycamel
-{
-// This is a blocking version of the queue. It has an almost identical interface to
-// the normal non-blocking version, with the addition of various wait_dequeue() methods
-// and the removal of producer-specific dequeue methods.
-template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
-class BlockingConcurrentQueue
-{
-private:
-	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
-	typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;
-
-public:
-	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
-	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
-	
-	typedef typename ConcurrentQueue::index_t index_t;
-	typedef typename ConcurrentQueue::size_t size_t;
-	typedef typename std::make_signed<size_t>::type ssize_t;
-	
-	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
-	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
-	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
-	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
-	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
-	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
-	
-public:
-	// Creates a queue with at least `capacity` element slots; note that the
-	// actual number of elements that can be inserted without additional memory
-	// allocation depends on the number of producers and the block size (e.g. if
-	// the block size is equal to `capacity`, only a single block will be allocated
-	// up-front, which means only a single producer will be able to enqueue elements
-	// without an extra allocation -- blocks aren't shared between producers).
-	// This method is not thread safe -- it is up to the user to ensure that the
-	// queue is fully constructed before it starts being used by other threads (this
-	// includes making the memory effects of construction visible, possibly with a
-	// memory barrier).
-	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
-		: inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-	{
-		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-		if (!sema) {
-			MOODYCAMEL_THROW(std::bad_alloc());
-		}
-	}
-	
-	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
-		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-	{
-		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-		if (!sema) {
-			MOODYCAMEL_THROW(std::bad_alloc());
-		}
-	}
-	
-	// Disable copying and copy assignment
-	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	
-	// Moving is supported, but note that it is *not* a thread-safe operation.
-	// Nobody can use the queue while it's being moved, and the memory effects
-	// of that move must be propagated to other threads before they can use it.
-	// Note: When a queue is moved, its tokens are still valid but can only be
-	// used with the destination queue (i.e. semantically they are moved along
-	// with the queue itself).
-	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-		: inner(std::move(other.inner)), sema(std::move(other.sema))
-	{ }
-	
-	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-	{
-		return swap_internal(other);
-	}
-	
-	// Swaps this queue's state with the other's. Not thread-safe.
-	// Swapping two queues does not invalidate their tokens, however
-	// the tokens that were created for one queue must be used with
-	// only the swapped queue (i.e. the tokens are tied to the
-	// queue's movable state, not the object itself).
-	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
-	{
-		swap_internal(other);
-	}
-	
-private:
-	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
-	{
-		if (this == &other) {
-			return *this;
-		}
-		
-		inner.swap(other.inner);
-		sema.swap(other.sema);
-		return *this;
-	}
-	
-public:
-	// Enqueues a single item (by copying it).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T const& item)
-	{
-		if ((details::likely)(inner.enqueue(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T&& item)
-	{
-		if ((details::likely)(inner.enqueue(std::move(item)))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T const& item)
-	{
-		if ((details::likely)(inner.enqueue(token, item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T&& item)
-	{
-		if ((details::likely)(inner.enqueue(token, std::move(item)))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool enqueue_bulk(It itemFirst, size_t count)
-	{
-		if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails
-	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		if ((details::likely)(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it).
-	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T const& item)
-	{
-		if (inner.try_enqueue(item)) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T&& item)
-	{
-		if (inner.try_enqueue(std::move(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T const& item)
-	{
-		if (inner.try_enqueue(token, item)) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T&& item)
-	{
-		if (inner.try_enqueue(token, std::move(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items.
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool try_enqueue_bulk(It itemFirst, size_t count)
-	{
-		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	
-	// Attempts to dequeue from the queue.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool try_dequeue(U& item)
-	{
-		if (sema->tryWait()) {
-			while (!inner.try_dequeue(item)) {
-				continue;
-			}
-			return true;
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue from the queue using an explicit consumer token.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool try_dequeue(consumer_token_t& token, U& item)
-	{
-		if (sema->tryWait()) {
-			while (!inner.try_dequeue(token, item)) {
-				continue;
-			}
-			return true;
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	
-	
-	// Blocks the current thread until there's something to dequeue, then
-	// dequeues it.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline void wait_dequeue(U& item)
-	{
-		while (!sema->wait()) {
-			continue;
-		}
-		while (!inner.try_dequeue(item)) {
-			continue;
-		}
-	}
-
-	// Blocks the current thread until either there's something to dequeue
-	// or the timeout (specified in microseconds) expires. Returns false
-	// without setting `item` if the timeout expires, otherwise assigns
-	// to `item` and returns true.
-	// Using a negative timeout indicates an indefinite timeout,
-	// and is thus functionally equivalent to calling wait_dequeue.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
-	{
-		if (!sema->wait(timeout_usecs)) {
-			return false;
-		}
-		while (!inner.try_dequeue(item)) {
-			continue;
-		}
-		return true;
-	}
-    
-    // Blocks the current thread until either there's something to dequeue
-	// or the timeout expires. Returns false without setting `item` if the
-    // timeout expires, otherwise assigns to `item` and returns true.
-	// Never allocates. Thread-safe.
-	template<typename U, typename Rep, typename Period>
-	inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
-    {
-        return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
-    }
-	
-	// Blocks the current thread until there's something to dequeue, then
-	// dequeues it using an explicit consumer token.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline void wait_dequeue(consumer_token_t& token, U& item)
-	{
-		while (!sema->wait()) {
-			continue;
-		}
-		while (!inner.try_dequeue(token, item)) {
-			continue;
-		}
-	}
-	
-	// Blocks the current thread until either there's something to dequeue
-	// or the timeout (specified in microseconds) expires. Returns false
-	// without setting `item` if the timeout expires, otherwise assigns
-	// to `item` and returns true.
-	// Using a negative timeout indicates an indefinite timeout,
-	// and is thus functionally equivalent to calling wait_dequeue.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
-	{
-		if (!sema->wait(timeout_usecs)) {
-			return false;
-		}
-		while (!inner.try_dequeue(token, item)) {
-			continue;
-		}
-		return true;
-	}
-    
-    // Blocks the current thread until either there's something to dequeue
-	// or the timeout expires. Returns false without setting `item` if the
-    // timeout expires, otherwise assigns to `item` and returns true.
-	// Never allocates. Thread-safe.
-	template<typename U, typename Rep, typename Period>
-	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
-    {
-        return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
-    }
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued, which will
-	// always be at least one (this method blocks until the queue
-	// is non-empty) and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued, which can
-	// be 0 if the timeout expires while waiting for elements,
-	// and at most max.
-	// Using a negative timeout indicates an indefinite timeout,
-	// and is thus functionally equivalent to calling wait_dequeue_bulk.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-		}
-		return count;
-	}
-    
-    // Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued, which can
-	// be 0 if the timeout expires while waiting for elements,
-	// and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It, typename Rep, typename Period>
-	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
-    {
-        return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
-    }
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued, which will
-	// always be at least one (this method blocks until the queue
-	// is non-empty) and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued, which can
-	// be 0 if the timeout expires while waiting for elements,
-	// and at most max.
-	// Using a negative timeout indicates an indefinite timeout,
-	// and is thus functionally equivalent to calling wait_dequeue_bulk.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued, which can
-	// be 0 if the timeout expires while waiting for elements,
-	// and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It, typename Rep, typename Period>
-	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
-    {
-        return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
-    }
-	
-	
-	// Returns an estimate of the total number of elements currently in the queue. This
-	// estimate is only accurate if the queue has completely stabilized before it is called
-	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
-	// visible on the calling thread, and no further operations start while this method is
-	// being called).
-	// Thread-safe.
-	inline size_t size_approx() const
-	{
-		return (size_t)sema->availableApprox();
-	}
-	
-	
-	// Returns true if the underlying atomic variables used by
-	// the queue are lock-free (they should be on most platforms).
-	// Thread-safe.
-	static constexpr bool is_lock_free()
-	{
-		return ConcurrentQueue::is_lock_free();
-	}
-	
-
-private:
-	template<typename U, typename A1, typename A2>
-	static inline U* create(A1&& a1, A2&& a2)
-	{
-		void* p = (Traits::malloc)(sizeof(U));
-		return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
-	}
-	
-	template<typename U>
-	static inline void destroy(U* p)
-	{
-		if (p != nullptr) {
-			p->~U();
-		}
-		(Traits::free)(p);
-	}
-	
-private:
-	ConcurrentQueue inner;
-	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
-};
-
-
-template<typename T, typename Traits>
-inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
-{
-	a.swap(b);
-}
-
-}	// end namespace moodycamel
diff --git a/src/moodycamel/concurrentqueue.h b/src/moodycamel/concurrentqueue.h
deleted file mode 100644
index 99caefc0..00000000
--- a/src/moodycamel/concurrentqueue.h
+++ /dev/null
@@ -1,3747 +0,0 @@
-// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
-// An overview, including benchmark results, is provided here:
-//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
-// The full design is also described in excruciating detail at:
-//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
-
-// Simplified BSD license:
-// Copyright (c) 2013-2020, Cameron Desrochers.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice, this list of
-// conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice, this list of
-// conditions and the following disclaimer in the documentation and/or other materials
-// provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
-// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
-// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
-// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Also dual-licensed under the Boost Software License (see LICENSE.md)
-
-#pragma once
-
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
-// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
-// upon assigning any computed values)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-
-#ifdef MCDBGQ_USE_RELACY
-#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
-#endif
-#endif
-
-#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
-// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
-// does not support `if constexpr`, so we have no choice but to simply disable the warning
-#pragma warning(push)
-#pragma warning(disable: 4127)  // conditional expression is constant
-#endif
-
-#if defined(__APPLE__)
-#include "TargetConditionals.h"
-#endif
-
-#ifdef MCDBGQ_USE_RELACY
-#include "relacy/relacy_std.hpp"
-#include "relacy_shims.h"
-// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
-// We'll override the default trait malloc ourselves without a macro.
-#undef new
-#undef delete
-#undef malloc
-#undef free
-#else
-#include <atomic>		// Requires C++11. Sorry VS2010.
-#include <cassert>
-#endif
-#include <cstddef>              // for max_align_t
-#include <cstdint>
-#include <cstdlib>
-#include <type_traits>
-#include <algorithm>
-#include <utility>
-#include <limits>
-#include <climits>		// for CHAR_BIT
-#include <array>
-#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
-#include <mutex>        // used for thread exit synchronization
-
-// Platform-specific definitions of a numeric thread ID type and an invalid value
-namespace moodycamel { namespace details {
-	template<typename thread_id_t> struct thread_id_converter {
-		typedef thread_id_t thread_id_numeric_size_t;
-		typedef thread_id_t thread_id_hash_t;
-		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
-	};
-} }
-#if defined(MCDBGQ_USE_RELACY)
-namespace moodycamel { namespace details {
-	typedef std::uint32_t thread_id_t;
-	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
-	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
-	static inline thread_id_t thread_id() { return rl::thread_index(); }
-} }
-#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
-// No sense pulling in windows.h in a header, we'll manually declare the function
-// we use and rely on backwards-compatibility for this not to break
-extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
-namespace moodycamel { namespace details {
-	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
-	typedef std::uint32_t thread_id_t;
-	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
-	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
-	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
-} }
-#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
-namespace moodycamel { namespace details {
-	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
-	
-	typedef std::thread::id thread_id_t;
-	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
-
-	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
-	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
-	// be.
-	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
-
-	template<std::size_t> struct thread_id_size { };
-	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
-	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
-
-	template<> struct thread_id_converter<thread_id_t> {
-		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
-#ifndef __APPLE__
-		typedef std::size_t thread_id_hash_t;
-#else
-		typedef thread_id_numeric_size_t thread_id_hash_t;
-#endif
-
-		static thread_id_hash_t prehash(thread_id_t const& x)
-		{
-#ifndef __APPLE__
-			return std::hash<std::thread::id>()(x);
-#else
-			return *reinterpret_cast<thread_id_hash_t const*>(&x);
-#endif
-		}
-	};
-} }
-#else
-// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
-// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
-// static variable's address as a thread identifier :-)
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#define MOODYCAMEL_THREADLOCAL __thread
-#elif defined(_MSC_VER)
-#define MOODYCAMEL_THREADLOCAL __declspec(thread)
-#else
-// Assume C++11 compliant compiler
-#define MOODYCAMEL_THREADLOCAL thread_local
-#endif
-namespace moodycamel { namespace details {
-	typedef std::uintptr_t thread_id_t;
-	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
-	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
-	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
-} }
-#endif
-
-// Constexpr if
-#ifndef MOODYCAMEL_CONSTEXPR_IF
-#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
-#define MOODYCAMEL_CONSTEXPR_IF if constexpr
-#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
-#else
-#define MOODYCAMEL_CONSTEXPR_IF if
-#define MOODYCAMEL_MAYBE_UNUSED
-#endif
-#endif
-
-// Exceptions
-#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
-#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
-#define MOODYCAMEL_EXCEPTIONS_ENABLED
-#endif
-#endif
-#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
-#define MOODYCAMEL_TRY try
-#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
-#define MOODYCAMEL_RETHROW throw
-#define MOODYCAMEL_THROW(expr) throw (expr)
-#else
-#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
-#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
-#define MOODYCAMEL_RETHROW
-#define MOODYCAMEL_THROW(expr)
-#endif
-
-#ifndef MOODYCAMEL_NOEXCEPT
-#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
-#define MOODYCAMEL_NOEXCEPT
-#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
-#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
-#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
-// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
-// We have to assume *all* non-trivial constructors may throw on VS2012!
-#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
-#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
-#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
-#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
-#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
-#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
-#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
-#else
-#define MOODYCAMEL_NOEXCEPT noexcept
-#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
-#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
-#endif
-#endif
-
-#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-#ifdef MCDBGQ_USE_RELACY
-#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-#else
-// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
-// g++ <=4.7 doesn't support thread_local either.
-// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
-#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)
-// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
-#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for now; years ago several users report having problems with it on
-#endif
-#endif
-#endif
-
-// VS2012 doesn't support deleted functions. 
-// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
-#ifndef MOODYCAMEL_DELETE_FUNCTION
-#if defined(_MSC_VER) && _MSC_VER < 1800
-#define MOODYCAMEL_DELETE_FUNCTION
-#else
-#define MOODYCAMEL_DELETE_FUNCTION = delete
-#endif
-#endif
-
-namespace moodycamel { namespace details {
-#ifndef MOODYCAMEL_ALIGNAS
-// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
-#if defined(_MSC_VER) && _MSC_VER <= 1800
-#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
-#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
-#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
-	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
-	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
-	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
-	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
-	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
-	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
-	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
-	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
-	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
-	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
-#else
-	template<typename T> struct identity { typedef T type; };
-#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
-#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
-#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
-#endif
-#endif
-} }
-
-
-// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
-// we can apply per-function compile-time suppression.
-// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
-#define MOODYCAMEL_NO_TSAN
-#if defined(__has_feature)
- #if __has_feature(thread_sanitizer)
-  #undef MOODYCAMEL_NO_TSAN
-  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
- #endif // TSAN
-#endif // TSAN
-
-// Compiler-specific likely/unlikely hints
-namespace moodycamel { namespace details {
-#if defined(__GNUC__)
-	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
-	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
-#else
-	static inline bool (likely)(bool x) { return x; }
-	static inline bool (unlikely)(bool x) { return x; }
-#endif
-} }
-
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-#include "internal/concurrentqueue_internal_debug.h"
-#endif
-
-namespace moodycamel {
-namespace details {
-	template<typename T>
-	struct const_numeric_max {
-		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
-		static const T value = std::numeric_limits<T>::is_signed
-			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
-			: static_cast<T>(-1);
-	};
-
-#if defined(__GLIBCXX__)
-	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
-#else
-	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
-#endif
-
-	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
-	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
-	typedef union {
-		std_max_align_t x;
-		long long y;
-		void* z;
-	} max_align_t;
-}
-
-// Default traits for the ConcurrentQueue. To change some of the
-// traits without re-implementing all of them, inherit from this
-// struct and shadow the declarations you wish to be different;
-// since the traits are used as a template type parameter, the
-// shadowed declarations will be used where defined, and the defaults
-// otherwise.
-struct ConcurrentQueueDefaultTraits
-{
-	// General-purpose size type. std::size_t is strongly recommended.
-	typedef std::size_t size_t;
-	
-	// The type used for the enqueue and dequeue indices. Must be at least as
-	// large as size_t. Should be significantly larger than the number of elements
-	// you expect to hold at once, especially if you have a high turnover rate;
-	// for example, on 32-bit x86, if you expect to have over a hundred million
-	// elements or pump several million elements through your queue in a very
-	// short space of time, using a 32-bit type *may* trigger a race condition.
-	// A 64-bit int type is recommended in that case, and in practice will
-	// prevent a race condition no matter the usage of the queue. Note that
-	// whether the queue is lock-free with a 64-int type depends on the whether
-	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
-	typedef std::size_t index_t;
-	
-	// Internally, all elements are enqueued and dequeued from multi-element
-	// blocks; this is the smallest controllable unit. If you expect few elements
-	// but many producers, a smaller block size should be favoured. For few producers
-	// and/or many elements, a larger block size is preferred. A sane default
-	// is provided. Must be a power of 2.
-	static const size_t BLOCK_SIZE = 32;
-	
-	// For explicit producers (i.e. when using a producer token), the block is
-	// checked for being empty by iterating through a list of flags, one per element.
-	// For large block sizes, this is too inefficient, and switching to an atomic
-	// counter-based approach is faster. The switch is made for block sizes strictly
-	// larger than this threshold.
-	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
-	
-	// How many full blocks can be expected for a single explicit producer? This should
-	// reflect that number's maximum for optimal performance. Must be a power of 2.
-	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
-	
-	// How many full blocks can be expected for a single implicit producer? This should
-	// reflect that number's maximum for optimal performance. Must be a power of 2.
-	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
-	
-	// The initial size of the hash table mapping thread IDs to implicit producers.
-	// Note that the hash is resized every time it becomes half full.
-	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
-	// (using the enqueue methods without an explicit producer token) is disabled.
-	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
-	
-	// Controls the number of items that an explicit consumer (i.e. one with a token)
-	// must consume before it causes all consumers to rotate and move on to the next
-	// internal queue.
-	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
-	
-	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
-	// Enqueue operations that would cause this limit to be surpassed will fail. Note
-	// that this limit is enforced at the block level (for performance reasons), i.e.
-	// it's rounded up to the nearest block size.
-	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
-
-	// The number of times to spin before sleeping when waiting on a semaphore.
-	// Recommended values are on the order of 1000-10000 unless the number of
-	// consumer threads exceeds the number of idle cores (in which case try 0-100).
-	// Only affects instances of the BlockingConcurrentQueue.
-	static const int MAX_SEMA_SPINS = 10000;
-
-	// Whether to recycle dynamically-allocated blocks into an internal free list or
-	// not. If false, only pre-allocated blocks (controlled by the constructor
-	// arguments) will be recycled, and all others will be `free`d back to the heap.
-	// Note that blocks consumed by explicit producers are only freed on destruction
-	// of the queue (not following destruction of the token) regardless of this trait.
-	static const bool RECYCLE_ALLOCATED_BLOCKS = false;
-
-	
-#ifndef MCDBGQ_USE_RELACY
-	// Memory allocation can be customized if needed.
-	// malloc should return nullptr on failure, and handle alignment like std::malloc.
-#if defined(malloc) || defined(free)
-	// Gah, this is 2015, stop defining macros that break standard code already!
-	// Work around malloc/free being special macros:
-	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
-	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
-	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
-	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
-#else
-	static inline void* malloc(size_t size) { return std::malloc(size); }
-	static inline void free(void* ptr) { return std::free(ptr); }
-#endif
-#else
-	// Debug versions when running under the Relacy race detector (ignore
-	// these in user code)
-	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
-	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
-#endif
-};
-
-
-// When producing or consuming many elements, the most efficient way is to:
-//    1) Use one of the bulk-operation methods of the queue with a token
-//    2) Failing that, use the bulk-operation methods without a token
-//    3) Failing that, create a token and use that with the single-item methods
-//    4) Failing that, use the single-parameter methods of the queue
-// Having said that, don't create tokens willy-nilly -- ideally there should be
-// a maximum of one token per thread (of each kind).
-struct ProducerToken;
-struct ConsumerToken;
-
-template<typename T, typename Traits> class ConcurrentQueue;
-template<typename T, typename Traits> class BlockingConcurrentQueue;
-class ConcurrentQueueTests;
-
-
-namespace details
-{
-	struct ConcurrentQueueProducerTypelessBase
-	{
-		ConcurrentQueueProducerTypelessBase* next;
-		std::atomic<bool> inactive;
-		ProducerToken* token;
-		
-		ConcurrentQueueProducerTypelessBase()
-			: next(nullptr), inactive(false), token(nullptr)
-		{
-		}
-	};
-	
-	template<bool use32> struct _hash_32_or_64 {
-		static inline std::uint32_t hash(std::uint32_t h)
-		{
-			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
-			// Since the thread ID is already unique, all we really want to do is propagate that
-			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
-			// reducing collisions significantly
-			h ^= h >> 16;
-			h *= 0x85ebca6b;
-			h ^= h >> 13;
-			h *= 0xc2b2ae35;
-			return h ^ (h >> 16);
-		}
-	};
-	template<> struct _hash_32_or_64<1> {
-		static inline std::uint64_t hash(std::uint64_t h)
-		{
-			h ^= h >> 33;
-			h *= 0xff51afd7ed558ccd;
-			h ^= h >> 33;
-			h *= 0xc4ceb9fe1a85ec53;
-			return h ^ (h >> 33);
-		}
-	};
-	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
-	
-	static inline size_t hash_thread_id(thread_id_t id)
-	{
-		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
-		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
-			thread_id_converter<thread_id_t>::prehash(id)));
-	}
-	
-	template<typename T>
-	static inline bool circular_less_than(T a, T b)
-	{
-		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
-		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
-		// Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
-		//       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here.
-	}
-	
-	template<typename U>
-	static inline char* align_for(char* ptr)
-	{
-		const std::size_t alignment = std::alignment_of<U>::value;
-		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
-	}
-
-	template<typename T>
-	static inline T ceil_to_pow_2(T x)
-	{
-		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
-
-		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
-		--x;
-		x |= x >> 1;
-		x |= x >> 2;
-		x |= x >> 4;
-		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
-			x |= x >> (i << 3);
-		}
-		++x;
-		return x;
-	}
-	
-	template<typename T>
-	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
-	{
-		T temp = std::move(left.load(std::memory_order_relaxed));
-		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
-		right.store(std::move(temp), std::memory_order_relaxed);
-	}
-	
-	template<typename T>
-	static inline T const& nomove(T const& x)
-	{
-		return x;
-	}
-	
-	template<bool Enable>
-	struct nomove_if
-	{
-		template<typename T>
-		static inline T const& eval(T const& x)
-		{
-			return x;
-		}
-	};
-	
-	template<>
-	struct nomove_if<false>
-	{
-		template<typename U>
-		static inline auto eval(U&& x)
-			-> decltype(std::forward<U>(x))
-		{
-			return std::forward<U>(x);
-		}
-	};
-	
-	template<typename It>
-	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
-	{
-		return *it;
-	}
-	
-#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
-#else
-	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
-#endif
-	
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-#ifdef MCDBGQ_USE_RELACY
-	typedef RelacyThreadExitListener ThreadExitListener;
-	typedef RelacyThreadExitNotifier ThreadExitNotifier;
-#else
-	class ThreadExitNotifier;
-
-	struct ThreadExitListener
-	{
-		typedef void (*callback_t)(void*);
-		callback_t callback;
-		void* userData;
-		
-		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
-		ThreadExitNotifier* chain;		// reserved for use by the ThreadExitNotifier
-	};
-
-	class ThreadExitNotifier
-	{
-	public:
-		static void subscribe(ThreadExitListener* listener)
-		{
-			auto& tlsInst = instance();
-			std::lock_guard<std::mutex> guard(mutex());
-			listener->next = tlsInst.tail;
-			listener->chain = &tlsInst;
-			tlsInst.tail = listener;
-		}
-		
-		static void unsubscribe(ThreadExitListener* listener)
-		{
-			std::lock_guard<std::mutex> guard(mutex());
-			if (!listener->chain) {
-				return;  // race with ~ThreadExitNotifier
-			}
-			auto& tlsInst = *listener->chain;
-			listener->chain = nullptr;
-			ThreadExitListener** prev = &tlsInst.tail;
-			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
-				if (ptr == listener) {
-					*prev = ptr->next;
-					break;
-				}
-				prev = &ptr->next;
-			}
-		}
-		
-	private:
-		ThreadExitNotifier() : tail(nullptr) { }
-		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
-		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
-		
-		~ThreadExitNotifier()
-		{
-			// This thread is about to exit, let everyone know!
-			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
-			std::lock_guard<std::mutex> guard(mutex());
-			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
-				ptr->chain = nullptr;
-				ptr->callback(ptr->userData);
-			}
-		}
-		
-		// Thread-local
-		static inline ThreadExitNotifier& instance()
-		{
-			static thread_local ThreadExitNotifier notifier;
-			return notifier;
-		}
-
-		static inline std::mutex& mutex()
-		{
-			// Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called
-			static std::mutex mutex;
-			return mutex;
-		}
-		
-	private:
-		ThreadExitListener* tail;
-	};
-#endif
-#endif
-	
-	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
-	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
-	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
-	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
-	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
-	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
-	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
-	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
-	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
-}
-
-
-struct ProducerToken
-{
-	template<typename T, typename Traits>
-	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
-	
-	template<typename T, typename Traits>
-	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
-	
-	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
-		: producer(other.producer)
-	{
-		other.producer = nullptr;
-		if (producer != nullptr) {
-			producer->token = this;
-		}
-	}
-	
-	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
-	{
-		swap(other);
-		return *this;
-	}
-	
-	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
-	{
-		std::swap(producer, other.producer);
-		if (producer != nullptr) {
-			producer->token = this;
-		}
-		if (other.producer != nullptr) {
-			other.producer->token = &other;
-		}
-	}
-	
-	// A token is always valid unless:
-	//     1) Memory allocation failed during construction
-	//     2) It was moved via the move constructor
-	//        (Note: assignment does a swap, leaving both potentially valid)
-	//     3) The associated queue was destroyed
-	// Note that if valid() returns true, that only indicates
-	// that the token is valid for use with a specific queue,
-	// but not which one; that's up to the user to track.
-	inline bool valid() const { return producer != nullptr; }
-	
-	~ProducerToken()
-	{
-		if (producer != nullptr) {
-			producer->token = nullptr;
-			producer->inactive.store(true, std::memory_order_release);
-		}
-	}
-	
-	// Disable copying and assignment
-	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
-	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
-	
-private:
-	template<typename T, typename Traits> friend class ConcurrentQueue;
-	friend class ConcurrentQueueTests;
-	
-protected:
-	details::ConcurrentQueueProducerTypelessBase* producer;
-};
-
-
-struct ConsumerToken
-{
-	template<typename T, typename Traits>
-	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
-	
-	template<typename T, typename Traits>
-	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
-	
-	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
-		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
-	{
-	}
-	
-	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
-	{
-		swap(other);
-		return *this;
-	}
-	
-	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
-	{
-		std::swap(initialOffset, other.initialOffset);
-		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
-		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
-		std::swap(currentProducer, other.currentProducer);
-		std::swap(desiredProducer, other.desiredProducer);
-	}
-	
-	// Disable copying and assignment
-	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
-	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
-
-private:
-	template<typename T, typename Traits> friend class ConcurrentQueue;
-	friend class ConcurrentQueueTests;
-	
-private: // but shared with ConcurrentQueue
-	std::uint32_t initialOffset;
-	std::uint32_t lastKnownGlobalOffset;
-	std::uint32_t itemsConsumedFromCurrent;
-	details::ConcurrentQueueProducerTypelessBase* currentProducer;
-	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
-};
-
-// Need to forward-declare this swap because it's in a namespace.
-// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
-template<typename T, typename Traits>
-inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;
-
-
-template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
-class ConcurrentQueue
-{
-public:
-	typedef ::moodycamel::ProducerToken producer_token_t;
-	typedef ::moodycamel::ConsumerToken consumer_token_t;
-	
-	typedef typename Traits::index_t index_t;
-	typedef typename Traits::size_t size_t;
-	
-	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
-	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
-	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
-	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
-	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
-	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
-#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
-#endif
-	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
-	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
-	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
-	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
-	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
-	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
-	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
-	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
-	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
-
-public:
-	// Creates a queue with at least `capacity` element slots; note that the
-	// actual number of elements that can be inserted without additional memory
-	// allocation depends on the number of producers and the block size (e.g. if
-	// the block size is equal to `capacity`, only a single block will be allocated
-	// up-front, which means only a single producer will be able to enqueue elements
-	// without an extra allocation -- blocks aren't shared between producers).
-	// This method is not thread safe -- it is up to the user to ensure that the
-	// queue is fully constructed before it starts being used by other threads (this
-	// includes making the memory effects of construction visible, possibly with a
-	// memory barrier).
-	explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
-		: producerListTail(nullptr),
-		producerCount(0),
-		initialBlockPoolIndex(0),
-		nextExplicitConsumerId(0),
-		globalExplicitConsumerOffset(0)
-	{
-		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
-		populate_initial_implicit_producer_hash();
-		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-		// Track all the producers using a fully-resolved typed list for
-		// each kind; this makes it possible to debug them starting from
-		// the root queue object (otherwise wacky casts are needed that
-		// don't compile in the debugger's expression evaluator).
-		explicitProducers.store(nullptr, std::memory_order_relaxed);
-		implicitProducers.store(nullptr, std::memory_order_relaxed);
-#endif
-	}
-	
-	// Computes the correct amount of pre-allocated blocks for you based
-	// on the minimum number of elements you want available at any given
-	// time, and the maximum concurrent number of each type of producer.
-	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
-		: producerListTail(nullptr),
-		producerCount(0),
-		initialBlockPoolIndex(0),
-		nextExplicitConsumerId(0),
-		globalExplicitConsumerOffset(0)
-	{
-		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
-		populate_initial_implicit_producer_hash();
-		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
-		populate_initial_block_list(blocks);
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-		explicitProducers.store(nullptr, std::memory_order_relaxed);
-		implicitProducers.store(nullptr, std::memory_order_relaxed);
-#endif
-	}
-	
-	// Note: The queue should not be accessed concurrently while it's
-	// being deleted. It's up to the user to synchronize this.
-	// This method is not thread safe.
-	~ConcurrentQueue()
-	{
-		// Destroy producers
-		auto ptr = producerListTail.load(std::memory_order_relaxed);
-		while (ptr != nullptr) {
-			auto next = ptr->next_prod();
-			if (ptr->token != nullptr) {
-				ptr->token->producer = nullptr;
-			}
-			destroy(ptr);
-			ptr = next;
-		}
-		
-		// Destroy implicit producer hash tables
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
-			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
-			while (hash != nullptr) {
-				auto prev = hash->prev;
-				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
-					for (size_t i = 0; i != hash->capacity; ++i) {
-						hash->entries[i].~ImplicitProducerKVP();
-					}
-					hash->~ImplicitProducerHash();
-					(Traits::free)(hash);
-				}
-				hash = prev;
-			}
-		}
-		
-		// Destroy global free list
-		auto block = freeList.head_unsafe();
-		while (block != nullptr) {
-			auto next = block->freeListNext.load(std::memory_order_relaxed);
-			if (block->dynamicallyAllocated) {
-				destroy(block);
-			}
-			block = next;
-		}
-		
-		// Destroy initial free list
-		destroy_array(initialBlockPool, initialBlockPoolSize);
-	}
-
-	// Disable copying and copy assignment
-	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	
-	// Moving is supported, but note that it is *not* a thread-safe operation.
-	// Nobody can use the queue while it's being moved, and the memory effects
-	// of that move must be propagated to other threads before they can use it.
-	// Note: When a queue is moved, its tokens are still valid but can only be
-	// used with the destination queue (i.e. semantically they are moved along
-	// with the queue itself).
-	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
-		producerCount(other.producerCount.load(std::memory_order_relaxed)),
-		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
-		initialBlockPool(other.initialBlockPool),
-		initialBlockPoolSize(other.initialBlockPoolSize),
-		freeList(std::move(other.freeList)),
-		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
-		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
-	{
-		// Move the other one into this, and leave the other one as an empty queue
-		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
-		populate_initial_implicit_producer_hash();
-		swap_implicit_producer_hashes(other);
-		
-		other.producerListTail.store(nullptr, std::memory_order_relaxed);
-		other.producerCount.store(0, std::memory_order_relaxed);
-		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
-		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
-		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
-		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
-		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
-#endif
-		
-		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
-		other.initialBlockPoolSize = 0;
-		other.initialBlockPool = nullptr;
-		
-		reown_producers();
-	}
-	
-	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-	{
-		return swap_internal(other);
-	}
-	
-	// Swaps this queue's state with the other's. Not thread-safe.
-	// Swapping two queues does not invalidate their tokens, however
-	// the tokens that were created for one queue must be used with
-	// only the swapped queue (i.e. the tokens are tied to the
-	// queue's movable state, not the object itself).
-	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
-	{
-		swap_internal(other);
-	}
-	
-private:
-	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
-	{
-		if (this == &other) {
-			return *this;
-		}
-		
-		details::swap_relaxed(producerListTail, other.producerListTail);
-		details::swap_relaxed(producerCount, other.producerCount);
-		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
-		std::swap(initialBlockPool, other.initialBlockPool);
-		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
-		freeList.swap(other.freeList);
-		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
-		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
-		
-		swap_implicit_producer_hashes(other);
-		
-		reown_producers();
-		other.reown_producers();
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-		details::swap_relaxed(explicitProducers, other.explicitProducers);
-		details::swap_relaxed(implicitProducers, other.implicitProducers);
-#endif
-		
-		return *this;
-	}
-	
-public:
-	// Enqueues a single item (by copying it).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T const& item)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue<CanAlloc>(item);
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T&& item)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue<CanAlloc>(std::move(item));
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T const& item)
-	{
-		return inner_enqueue<CanAlloc>(token, item);
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T&& item)
-	{
-		return inner_enqueue<CanAlloc>(token, std::move(item));
-	}
-	
-	// Enqueues several items.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
-	// Thread-safe.
-	template<typename It>
-	bool enqueue_bulk(It itemFirst, size_t count)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails
-	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
-	}
-	
-	// Enqueues a single item (by copying it).
-	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T const& item)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue<CannotAlloc>(item);
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T&& item)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue<CannotAlloc>(std::move(item));
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T const& item)
-	{
-		return inner_enqueue<CannotAlloc>(token, item);
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T&& item)
-	{
-		return inner_enqueue<CannotAlloc>(token, std::move(item));
-	}
-	
-	// Enqueues several items.
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	bool try_enqueue_bulk(It itemFirst, size_t count)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
-	}
-	
-	
-	
-	// Attempts to dequeue from the queue.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	bool try_dequeue(U& item)
-	{
-		// Instead of simply trying each producer in turn (which could cause needless contention on the first
-		// producer), we score them heuristically.
-		size_t nonEmptyCount = 0;
-		ProducerBase* best = nullptr;
-		size_t bestSize = 0;
-		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
-			auto size = ptr->size_approx();
-			if (size > 0) {
-				if (size > bestSize) {
-					bestSize = size;
-					best = ptr;
-				}
-				++nonEmptyCount;
-			}
-		}
-		
-		// If there was at least one non-empty queue but it appears empty at the time
-		// we try to dequeue from it, we need to make sure every queue's been tried
-		if (nonEmptyCount > 0) {
-			if ((details::likely)(best->dequeue(item))) {
-				return true;
-			}
-			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-				if (ptr != best && ptr->dequeue(item)) {
-					return true;
-				}
-			}
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue from the queue.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// This differs from the try_dequeue(item) method in that this one does
-	// not attempt to reduce contention by interleaving the order that producer
-	// streams are dequeued from. So, using this method can reduce overall throughput
-	// under contention, but will give more predictable results in single-threaded
-	// consumer scenarios. This is mostly only useful for internal unit tests.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	bool try_dequeue_non_interleaved(U& item)
-	{
-		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-			if (ptr->dequeue(item)) {
-				return true;
-			}
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue from the queue using an explicit consumer token.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	bool try_dequeue(consumer_token_t& token, U& item)
-	{
-		// The idea is roughly as follows:
-		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
-		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
-		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
-		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
-		
-		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
-			if (!update_current_producer_after_rotation(token)) {
-				return false;
-			}
-		}
-		
-		// If there was at least one non-empty queue but it appears empty at the time
-		// we try to dequeue from it, we need to make sure every queue's been tried
-		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
-			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
-				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
-			}
-			return true;
-		}
-		
-		auto tail = producerListTail.load(std::memory_order_acquire);
-		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
-		if (ptr == nullptr) {
-			ptr = tail;
-		}
-		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
-			if (ptr->dequeue(item)) {
-				token.currentProducer = ptr;
-				token.itemsConsumedFromCurrent = 1;
-				return true;
-			}
-			ptr = ptr->next_prod();
-			if (ptr == nullptr) {
-				ptr = tail;
-			}
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	size_t try_dequeue_bulk(It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-			count += ptr->dequeue_bulk(itemFirst, max - count);
-			if (count == max) {
-				break;
-			}
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-	{
-		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
-			if (!update_current_producer_after_rotation(token)) {
-				return 0;
-			}
-		}
-		
-		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
-		if (count == max) {
-			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
-				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
-			}
-			return max;
-		}
-		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
-		max -= count;
-		
-		auto tail = producerListTail.load(std::memory_order_acquire);
-		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
-		if (ptr == nullptr) {
-			ptr = tail;
-		}
-		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
-			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
-			count += dequeued;
-			if (dequeued != 0) {
-				token.currentProducer = ptr;
-				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
-			}
-			if (dequeued == max) {
-				break;
-			}
-			max -= dequeued;
-			ptr = ptr->next_prod();
-			if (ptr == nullptr) {
-				ptr = tail;
-			}
-		}
-		return count;
-	}
-	
-	
-	
-	// Attempts to dequeue from a specific producer's inner queue.
-	// If you happen to know which producer you want to dequeue from, this
-	// is significantly faster than using the general-case try_dequeue methods.
-	// Returns false if the producer's queue appeared empty at the time it
-	// was checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
-	{
-		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
-	}
-	
-	// Attempts to dequeue several elements from a specific producer's inner queue.
-	// Returns the number of items actually dequeued.
-	// If you happen to know which producer you want to dequeue from, this
-	// is significantly faster than using the general-case try_dequeue methods.
-	// Returns 0 if the producer's queue appeared empty at the time it
-	// was checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
-	{
-		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
-	}
-	
-	
-	// Returns an estimate of the total number of elements currently in the queue. This
-	// estimate is only accurate if the queue has completely stabilized before it is called
-	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
-	// visible on the calling thread, and no further operations start while this method is
-	// being called).
-	// Thread-safe.
-	size_t size_approx() const
-	{
-		size_t size = 0;
-		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-			size += ptr->size_approx();
-		}
-		return size;
-	}
-	
-	
-	// Returns true if the underlying atomic variables used by
-	// the queue are lock-free (they should be on most platforms).
-	// Thread-safe.
-	static constexpr bool is_lock_free()
-	{
-		return
-			details::static_is_lock_free<bool>::value == 2 &&
-			details::static_is_lock_free<size_t>::value == 2 &&
-			details::static_is_lock_free<std::uint32_t>::value == 2 &&
-			details::static_is_lock_free<index_t>::value == 2 &&
-			details::static_is_lock_free<void*>::value == 2 &&
-			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
-	}
-
-
-private:
-	friend struct ProducerToken;
-	friend struct ConsumerToken;
-	struct ExplicitProducer;
-	friend struct ExplicitProducer;
-	struct ImplicitProducer;
-	friend struct ImplicitProducer;
-	friend class ConcurrentQueueTests;
-		
-	enum AllocationMode { CanAlloc, CannotAlloc };
-	
-	
-	///////////////////////////////
-	// Queue methods
-	///////////////////////////////
-	
-	template<AllocationMode canAlloc, typename U>
-	inline bool inner_enqueue(producer_token_t const& token, U&& element)
-	{
-		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
-	}
-	
-	template<AllocationMode canAlloc, typename U>
-	inline bool inner_enqueue(U&& element)
-	{
-		auto producer = get_or_add_implicit_producer();
-		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
-	}
-	
-	template<AllocationMode canAlloc, typename It>
-	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
-	}
-	
-	template<AllocationMode canAlloc, typename It>
-	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
-	{
-		auto producer = get_or_add_implicit_producer();
-		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
-	}
-	
-	inline bool update_current_producer_after_rotation(consumer_token_t& token)
-	{
-		// Ah, there's been a rotation, figure out where we should be!
-		auto tail = producerListTail.load(std::memory_order_acquire);
-		if (token.desiredProducer == nullptr && tail == nullptr) {
-			return false;
-		}
-		auto prodCount = producerCount.load(std::memory_order_relaxed);
-		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
-		if ((details::unlikely)(token.desiredProducer == nullptr)) {
-			// Aha, first time we're dequeueing anything.
-			// Figure out our local position
-			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
-			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
-			token.desiredProducer = tail;
-			for (std::uint32_t i = 0; i != offset; ++i) {
-				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
-				if (token.desiredProducer == nullptr) {
-					token.desiredProducer = tail;
-				}
-			}
-		}
-		
-		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
-		if (delta >= prodCount) {
-			delta = delta % prodCount;
-		}
-		for (std::uint32_t i = 0; i != delta; ++i) {
-			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
-			if (token.desiredProducer == nullptr) {
-				token.desiredProducer = tail;
-			}
-		}
-		
-		token.lastKnownGlobalOffset = globalOffset;
-		token.currentProducer = token.desiredProducer;
-		token.itemsConsumedFromCurrent = 0;
-		return true;
-	}
-	
-	
-	///////////////////////////
-	// Free list
-	///////////////////////////
-	
-	template <typename N>
-	struct FreeListNode
-	{
-		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
-		
-		std::atomic<std::uint32_t> freeListRefs;
-		std::atomic<N*> freeListNext;
-	};
-	
-	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
-	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
-	// speedy under low contention.
-	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
-	struct FreeList
-	{
-		FreeList() : freeListHead(nullptr) { }
-		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
-		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
-		
-		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
-		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
-		
-		inline void add(N* node)
-		{
-#ifdef MCDBGQ_NOLOCKFREE_FREELIST
-			debug::DebugLock lock(mutex);
-#endif		
-			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
-			// set it using a fetch_add
-			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
-				// Oh look! We were the last ones referencing this node, and we know
-				// we want to add it to the free list, so let's do it!
-		 		add_knowing_refcount_is_zero(node);
-			}
-		}
-		
-		inline N* try_get()
-		{
-#ifdef MCDBGQ_NOLOCKFREE_FREELIST
-			debug::DebugLock lock(mutex);
-#endif		
-			auto head = freeListHead.load(std::memory_order_acquire);
-			while (head != nullptr) {
-				auto prevHead = head;
-				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
-				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
-					head = freeListHead.load(std::memory_order_acquire);
-					continue;
-				}
-				
-				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
-				// next and not worry about it changing between now and the time we do the CAS
-				auto next = head->freeListNext.load(std::memory_order_relaxed);
-				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
-					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
-					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
-					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
-					
-					// Decrease refcount twice, once for our ref, and once for the list's ref
-					head->freeListRefs.fetch_sub(2, std::memory_order_release);
-					return head;
-				}
-				
-				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
-				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
-				// count decrement happens-after the CAS on the head.
-				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
-				if (refs == SHOULD_BE_ON_FREELIST + 1) {
-					add_knowing_refcount_is_zero(prevHead);
-				}
-			}
-			
-			return nullptr;
-		}
-		
-		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
-		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
-		
-	private:
-		inline void add_knowing_refcount_is_zero(N* node)
-		{
-			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
-			// only one copy of this method per node at a time, i.e. the single thread case), then we know
-			// we can safely change the next pointer of the node; however, once the refcount is back above
-			// zero, then other threads could increase it (happens under heavy contention, when the refcount
-			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
-			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
-			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
-			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
-			auto head = freeListHead.load(std::memory_order_relaxed);
-			while (true) {
-				node->freeListNext.store(head, std::memory_order_relaxed);
-				node->freeListRefs.store(1, std::memory_order_release);
-				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
-					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
-					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
-						continue;
-					}
-				}
-				return;
-			}
-		}
-		
-	private:
-		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
-		std::atomic<N*> freeListHead;
-	
-	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
-	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
-		
-#ifdef MCDBGQ_NOLOCKFREE_FREELIST
-		debug::DebugMutex mutex;
-#endif
-	};
-	
-	
-	///////////////////////////
-	// Block
-	///////////////////////////
-	
-	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
-	
-	struct Block
-	{
-		Block()
-			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
-		{
-#ifdef MCDBGQ_TRACKMEM
-			owner = nullptr;
-#endif
-		}
-		
-		template<InnerQueueContext context>
-		inline bool is_empty() const
-		{
-			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
-				// Check flags
-				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
-					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
-						return false;
-					}
-				}
-				
-				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
-				std::atomic_thread_fence(std::memory_order_acquire);
-				return true;
-			}
-			else {
-				// Check counter
-				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
-					std::atomic_thread_fence(std::memory_order_acquire);
-					return true;
-				}
-				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
-				return false;
-			}
-		}
-		
-		// Returns true if the block is now empty (does not apply in explicit context)
-		template<InnerQueueContext context>
-		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
-		{
-			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
-				// Set flag
-				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
-				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
-				return false;
-			}
-			else {
-				// Increment counter
-				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
-				assert(prevVal < BLOCK_SIZE);
-				return prevVal == BLOCK_SIZE - 1;
-			}
-		}
-		
-		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
-		// Returns true if the block is now empty (does not apply in explicit context).
-		template<InnerQueueContext context>
-		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
-		{
-			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
-				// Set flags
-				std::atomic_thread_fence(std::memory_order_release);
-				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
-				for (size_t j = 0; j != count; ++j) {
-					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
-					emptyFlags[i + j].store(true, std::memory_order_relaxed);
-				}
-				return false;
-			}
-			else {
-				// Increment counter
-				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
-				assert(prevVal + count <= BLOCK_SIZE);
-				return prevVal + count == BLOCK_SIZE;
-			}
-		}
-		
-		template<InnerQueueContext context>
-		inline void set_all_empty()
-		{
-			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
-				// Set all flags
-				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
-					emptyFlags[i].store(true, std::memory_order_relaxed);
-				}
-			}
-			else {
-				// Reset counter
-				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
-			}
-		}
-		
-		template<InnerQueueContext context>
-		inline void reset_empty()
-		{
-			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
-				// Reset flags
-				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
-					emptyFlags[i].store(false, std::memory_order_relaxed);
-				}
-			}
-			else {
-				// Reset counter
-				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
-			}
-		}
-		
-		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-		
-	private:
-		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
-		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
-	public:
-		Block* next;
-		std::atomic<size_t> elementsCompletelyDequeued;
-		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
-	public:
-		std::atomic<std::uint32_t> freeListRefs;
-		std::atomic<Block*> freeListNext;
-		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
-		
-#ifdef MCDBGQ_TRACKMEM
-		void* owner;
-#endif
-	};
-	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
-
-
-#ifdef MCDBGQ_TRACKMEM
-public:
-	struct MemStats;
-private:
-#endif
-	
-	///////////////////////////
-	// Producer base
-	///////////////////////////
-	
-	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
-	{
-		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
-			tailIndex(0),
-			headIndex(0),
-			dequeueOptimisticCount(0),
-			dequeueOvercommit(0),
-			tailBlock(nullptr),
-			isExplicit(isExplicit_),
-			parent(parent_)
-		{
-		}
-		
-		virtual ~ProducerBase() { }
-		
-		template<typename U>
-		inline bool dequeue(U& element)
-		{
-			if (isExplicit) {
-				return static_cast<ExplicitProducer*>(this)->dequeue(element);
-			}
-			else {
-				return static_cast<ImplicitProducer*>(this)->dequeue(element);
-			}
-		}
-		
-		template<typename It>
-		inline size_t dequeue_bulk(It& itemFirst, size_t max)
-		{
-			if (isExplicit) {
-				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
-			}
-			else {
-				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
-			}
-		}
-		
-		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
-		
-		inline size_t size_approx() const
-		{
-			auto tail = tailIndex.load(std::memory_order_relaxed);
-			auto head = headIndex.load(std::memory_order_relaxed);
-			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
-		}
-		
-		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
-	protected:
-		std::atomic<index_t> tailIndex;		// Where to enqueue to next
-		std::atomic<index_t> headIndex;		// Where to dequeue from next
-		
-		std::atomic<index_t> dequeueOptimisticCount;
-		std::atomic<index_t> dequeueOvercommit;
-		
-		Block* tailBlock;
-		
-	public:
-		bool isExplicit;
-		ConcurrentQueue* parent;
-		
-	protected:
-#ifdef MCDBGQ_TRACKMEM
-		friend struct MemStats;
-#endif
-	};
-	
-	
-	///////////////////////////
-	// Explicit queue
-	///////////////////////////
-		
-	struct ExplicitProducer : public ProducerBase
-	{
-		explicit ExplicitProducer(ConcurrentQueue* parent_) :
-			ProducerBase(parent_, true),
-			blockIndex(nullptr),
-			pr_blockIndexSlotsUsed(0),
-			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
-			pr_blockIndexFront(0),
-			pr_blockIndexEntries(nullptr),
-			pr_blockIndexRaw(nullptr)
-		{
-			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
-			if (poolBasedIndexSize > pr_blockIndexSize) {
-				pr_blockIndexSize = poolBasedIndexSize;
-			}
-			
-			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
-		}
-		
-		~ExplicitProducer()
-		{
-			// Destruct any elements not yet dequeued.
-			// Since we're in the destructor, we can assume all elements
-			// are either completely dequeued or completely not (no halfways).
-			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
-				// First find the block that's partially dequeued, if any
-				Block* halfDequeuedBlock = nullptr;
-				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
-					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
-					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
-					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
-					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
-						i = (i + 1) & (pr_blockIndexSize - 1);
-					}
-					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
-					halfDequeuedBlock = pr_blockIndexEntries[i].block;
-				}
-				
-				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
-				auto block = this->tailBlock;
-				do {
-					block = block->next;
-					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-						continue;
-					}
-					
-					size_t i = 0;	// Offset into block
-					if (block == halfDequeuedBlock) {
-						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
-					}
-					
-					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
-					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
-					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
-						(*block)[i++]->~T();
-					}
-				} while (block != this->tailBlock);
-			}
-			
-			// Destroy all blocks that we own
-			if (this->tailBlock != nullptr) {
-				auto block = this->tailBlock;
-				do {
-					auto nextBlock = block->next;
-					this->parent->add_block_to_free_list(block);
-					block = nextBlock;
-				} while (block != this->tailBlock);
-			}
-			
-			// Destroy the block indices
-			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
-			while (header != nullptr) {
-				auto prev = static_cast<BlockIndexHeader*>(header->prev);
-				header->~BlockIndexHeader();
-				(Traits::free)(header);
-				header = prev;
-			}
-		}
-		
-		template<AllocationMode allocMode, typename U>
-		inline bool enqueue(U&& element)
-		{
-			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
-			index_t newTailIndex = 1 + currentTailIndex;
-			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
-				// We reached the end of a block, start a new one
-				auto startBlock = this->tailBlock;
-				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
-				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-					// We can re-use the block ahead of us, it's empty!					
-					this->tailBlock = this->tailBlock->next;
-					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
-					
-					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
-					// last block from it first -- except instead of removing then adding, we can just overwrite).
-					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
-					// it would have been re-attempted when adding the first block to the queue; since there is such
-					// a block, a block index must have been successfully allocated.
-				}
-				else {
-					// Whatever head value we see here is >= the last value we saw here (relatively),
-					// and <= its current value. Since we have the most recent tail, the head must be
-					// <= to it.
-					auto head = this->headIndex.load(std::memory_order_relaxed);
-					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
-						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
-						// We can't enqueue in another block because there's not enough leeway -- the
-						// tail could surpass the head by the time the block fills up! (Or we'll exceed
-						// the size limit, if the second part of the condition was true.)
-						return false;
-					}
-					// We're going to need a new block; check that the block index has room
-					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
-						// Hmm, the circular block index is already full -- we'll need
-						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
-						// the initial allocation failed in the constructor.
-						
-						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
-							return false;
-						}
-						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
-							return false;
-						}
-					}
-					
-					// Insert a new block in the circular linked list
-					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
-					if (newBlock == nullptr) {
-						return false;
-					}
-#ifdef MCDBGQ_TRACKMEM
-					newBlock->owner = this;
-#endif
-					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
-					if (this->tailBlock == nullptr) {
-						newBlock->next = newBlock;
-					}
-					else {
-						newBlock->next = this->tailBlock->next;
-						this->tailBlock->next = newBlock;
-					}
-					this->tailBlock = newBlock;
-					++pr_blockIndexSlotsUsed;
-				}
-
-				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
-					// The constructor may throw. We want the element not to appear in the queue in
-					// that case (without corrupting the queue):
-					MOODYCAMEL_TRY {
-						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-					}
-					MOODYCAMEL_CATCH (...) {
-						// Revert change to the current block, but leave the new block available
-						// for next time
-						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
-						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
-						MOODYCAMEL_RETHROW;
-					}
-				}
-				else {
-					(void)startBlock;
-					(void)originalBlockIndexSlotsUsed;
-				}
-				
-				// Add block to block index
-				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
-				entry.base = currentTailIndex;
-				entry.block = this->tailBlock;
-				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
-				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
-				
-				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
-					this->tailIndex.store(newTailIndex, std::memory_order_release);
-					return true;
-				}
-			}
-			
-			// Enqueue
-			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-			
-			this->tailIndex.store(newTailIndex, std::memory_order_release);
-			return true;
-		}
-		
-		template<typename U>
-		bool dequeue(U& element)
-		{
-			auto tail = this->tailIndex.load(std::memory_order_relaxed);
-			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
-			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
-				// Might be something to dequeue, let's give it a try
-				
-				// Note that this if is purely for performance purposes in the common case when the queue is
-				// empty and the values are eventually consistent -- we may enter here spuriously.
-				
-				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
-				// change them) and must be the same value at this point (inside the if) as when the if condition was
-				// evaluated.
-
-				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
-				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
-				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
-				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
-				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
-				// unfortunately that can't be shown to be correct using only the C++11 standard.
-				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
-				std::atomic_thread_fence(std::memory_order_acquire);
-				
-				// Increment optimistic counter, then check if it went over the boundary
-				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-				
-				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
-				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
-				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
-				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
-				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
-				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
-				
-				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
-				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
-				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
-				tail = this->tailIndex.load(std::memory_order_acquire);
-				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
-					// Guaranteed to be at least one element to dequeue!
-					
-					// Get the index. Note that since there's guaranteed to be at least one element, this
-					// will never exceed tail. We need to do an acquire-release fence here since it's possible
-					// that whatever condition got us to this point was for an earlier enqueued element (that
-					// we already see the memory effects for), but that by the time we increment somebody else
-					// has incremented it, and we need to see the memory effects for *that* element, which is
-					// in such a case is necessarily visible on the thread that incremented it in the first
-					// place with the more current condition (they must have acquired a tail that is at least
-					// as recent).
-					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-					
-					
-					// Determine which block the element is in
-					
-					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
-					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-					
-					// We need to be careful here about subtracting and dividing because of index wrap-around.
-					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
-					// block size (in order to get a correct signed block count offset in all cases):
-					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
-					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
-					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
-					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
-					
-					// Dequeue
-					auto& el = *((*block)[index]);
-					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
-						// Make sure the element is still fully dequeued and destroyed even if the assignment
-						// throws
-						struct Guard {
-							Block* block;
-							index_t index;
-							
-							~Guard()
-							{
-								(*block)[index]->~T();
-								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
-							}
-						} guard = { block, index };
-
-						element = std::move(el); // NOLINT
-					}
-					else {
-						element = std::move(el); // NOLINT
-						el.~T(); // NOLINT
-						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
-					}
-					
-					return true;
-				}
-				else {
-					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
-					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
-				}
-			}
-		
-			return false;
-		}
-		
-		template<AllocationMode allocMode, typename It>
-		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
-		{
-			// First, we need to make sure we have enough room to enqueue all of the elements;
-			// this means pre-allocating blocks and putting them in the block index (but only if
-			// all the allocations succeeded).
-			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
-			auto startBlock = this->tailBlock;
-			auto originalBlockIndexFront = pr_blockIndexFront;
-			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
-			
-			Block* firstAllocatedBlock = nullptr;
-			
-			// Figure out how many blocks we'll need to allocate, and do so
-			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
-			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
-			if (blockBaseDiff > 0) {
-				// Allocate as many blocks as possible from ahead
-				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
-					this->tailBlock = this->tailBlock->next;
-					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-					
-					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
-					entry.base = currentTailIndex;
-					entry.block = this->tailBlock;
-					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
-				}
-				
-				// Now allocate as many blocks as necessary from the block pool
-				while (blockBaseDiff > 0) {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
-					auto head = this->headIndex.load(std::memory_order_relaxed);
-					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
-					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
-						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
-							// Failed to allocate, undo changes (but keep injected blocks)
-							pr_blockIndexFront = originalBlockIndexFront;
-							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
-							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-							return false;
-						}
-						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
-							// Failed to allocate, undo changes (but keep injected blocks)
-							pr_blockIndexFront = originalBlockIndexFront;
-							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
-							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-							return false;
-						}
-						
-						// pr_blockIndexFront is updated inside new_block_index, so we need to
-						// update our fallback value too (since we keep the new index even if we
-						// later fail)
-						originalBlockIndexFront = originalBlockIndexSlotsUsed;
-					}
-					
-					// Insert a new block in the circular linked list
-					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
-					if (newBlock == nullptr) {
-						pr_blockIndexFront = originalBlockIndexFront;
-						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
-						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-						return false;
-					}
-					
-#ifdef MCDBGQ_TRACKMEM
-					newBlock->owner = this;
-#endif
-					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
-					if (this->tailBlock == nullptr) {
-						newBlock->next = newBlock;
-					}
-					else {
-						newBlock->next = this->tailBlock->next;
-						this->tailBlock->next = newBlock;
-					}
-					this->tailBlock = newBlock;
-					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-					
-					++pr_blockIndexSlotsUsed;
-					
-					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
-					entry.base = currentTailIndex;
-					entry.block = this->tailBlock;
-					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
-				}
-				
-				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
-				// publish the new block index front
-				auto block = firstAllocatedBlock;
-				while (true) {
-					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
-					if (block == this->tailBlock) {
-						break;
-					}
-					block = block->next;
-				}
-				
-				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
-					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
-				}
-			}
-			
-			// Enqueue, one block at a time
-			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
-			currentTailIndex = startTailIndex;
-			auto endBlock = this->tailBlock;
-			this->tailBlock = startBlock;
-			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
-			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
-				this->tailBlock = firstAllocatedBlock;
-			}
-			while (true) {
-				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
-					stopIndex = newTailIndex;
-				}
-				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
-					while (currentTailIndex != stopIndex) {
-						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
-					}
-				}
-				else {
-					MOODYCAMEL_TRY {
-						while (currentTailIndex != stopIndex) {
-							// Must use copy constructor even if move constructor is available
-							// because we may have to revert if there's an exception.
-							// Sorry about the horrible templated next line, but it was the only way
-							// to disable moving *at compile time*, which is important because a type
-							// may only define a (noexcept) move constructor, and so calls to the
-							// cctor will not compile, even if they are in an if branch that will never
-							// be executed
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
-							++currentTailIndex;
-							++itemFirst;
-						}
-					}
-					MOODYCAMEL_CATCH (...) {
-						// Oh dear, an exception's been thrown -- destroy the elements that
-						// were enqueued so far and revert the entire bulk operation (we'll keep
-						// any allocated blocks in our linked list for later, though).
-						auto constructedStopIndex = currentTailIndex;
-						auto lastBlockEnqueued = this->tailBlock;
-						
-						pr_blockIndexFront = originalBlockIndexFront;
-						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
-						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-						
-						if (!details::is_trivially_destructible<T>::value) {
-							auto block = startBlock;
-							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
-								block = firstAllocatedBlock;
-							}
-							currentTailIndex = startTailIndex;
-							while (true) {
-								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
-									stopIndex = constructedStopIndex;
-								}
-								while (currentTailIndex != stopIndex) {
-									(*block)[currentTailIndex++]->~T();
-								}
-								if (block == lastBlockEnqueued) {
-									break;
-								}
-								block = block->next;
-							}
-						}
-						MOODYCAMEL_RETHROW;
-					}
-				}
-				
-				if (this->tailBlock == endBlock) {
-					assert(currentTailIndex == newTailIndex);
-					break;
-				}
-				this->tailBlock = this->tailBlock->next;
-			}
-			
-			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
-				if (firstAllocatedBlock != nullptr)
-					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
-			}
-			
-			this->tailIndex.store(newTailIndex, std::memory_order_release);
-			return true;
-		}
-		
-		template<typename It>
-		size_t dequeue_bulk(It& itemFirst, size_t max)
-		{
-			auto tail = this->tailIndex.load(std::memory_order_relaxed);
-			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
-			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
-			if (details::circular_less_than<size_t>(0, desiredCount)) {
-				desiredCount = desiredCount < max ? desiredCount : max;
-				std::atomic_thread_fence(std::memory_order_acquire);
-				
-				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-				
-				tail = this->tailIndex.load(std::memory_order_acquire);
-				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
-				if (details::circular_less_than<size_t>(0, actualCount)) {
-					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
-					if (actualCount < desiredCount) {
-						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
-					}
-					
-					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
-					// will never exceed tail.
-					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-					
-					// Determine which block the first element is in
-					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
-					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-					
-					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
-					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
-					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
-					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
-					
-					// Iterate the blocks and dequeue
-					auto index = firstIndex;
-					do {
-						auto firstIndexInBlock = index;
-						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-						auto block = localBlockIndex->entries[indexIndex].block;
-						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
-							while (index != endIndex) {
-								auto& el = *((*block)[index]);
-								*itemFirst++ = std::move(el);
-								el.~T();
-								++index;
-							}
-						}
-						else {
-							MOODYCAMEL_TRY {
-								while (index != endIndex) {
-									auto& el = *((*block)[index]);
-									*itemFirst = std::move(el);
-									++itemFirst;
-									el.~T();
-									++index;
-								}
-							}
-							MOODYCAMEL_CATCH (...) {
-								// It's too late to revert the dequeue, but we can make sure that all
-								// the dequeued objects are properly destroyed and the block index
-								// (and empty count) are properly updated before we propagate the exception
-								do {
-									block = localBlockIndex->entries[indexIndex].block;
-									while (index != endIndex) {
-										(*block)[index++]->~T();
-									}
-									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
-									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
-									
-									firstIndexInBlock = index;
-									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-								} while (index != firstIndex + actualCount);
-								
-								MOODYCAMEL_RETHROW;
-							}
-						}
-						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
-						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
-					} while (index != firstIndex + actualCount);
-					
-					return actualCount;
-				}
-				else {
-					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
-					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
-				}
-			}
-			
-			return 0;
-		}
-		
-	private:
-		struct BlockIndexEntry
-		{
-			index_t base;
-			Block* block;
-		};
-		
-		struct BlockIndexHeader
-		{
-			size_t size;
-			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
-			BlockIndexEntry* entries;
-			void* prev;
-		};
-		
-		
-		bool new_block_index(size_t numberOfFilledSlotsToExpose)
-		{
-			auto prevBlockSizeMask = pr_blockIndexSize - 1;
-			
-			// Create the new block
-			pr_blockIndexSize <<= 1;
-			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
-			if (newRawPtr == nullptr) {
-				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
-				return false;
-			}
-			
-			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
-			
-			// Copy in all the old indices, if any
-			size_t j = 0;
-			if (pr_blockIndexSlotsUsed != 0) {
-				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
-				do {
-					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
-					i = (i + 1) & prevBlockSizeMask;
-				} while (i != pr_blockIndexFront);
-			}
-			
-			// Update everything
-			auto header = new (newRawPtr) BlockIndexHeader;
-			header->size = pr_blockIndexSize;
-			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
-			header->entries = newBlockIndexEntries;
-			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
-			
-			pr_blockIndexFront = j;
-			pr_blockIndexEntries = newBlockIndexEntries;
-			pr_blockIndexRaw = newRawPtr;
-			blockIndex.store(header, std::memory_order_release);
-			
-			return true;
-		}
-		
-	private:
-		std::atomic<BlockIndexHeader*> blockIndex;
-		
-		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
-		size_t pr_blockIndexSlotsUsed;
-		size_t pr_blockIndexSize;
-		size_t pr_blockIndexFront;		// Next slot (not current)
-		BlockIndexEntry* pr_blockIndexEntries;
-		void* pr_blockIndexRaw;
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-	public:
-		ExplicitProducer* nextExplicitProducer;
-	private:
-#endif
-		
-#ifdef MCDBGQ_TRACKMEM
-		friend struct MemStats;
-#endif
-	};
-	
-	
-	//////////////////////////////////
-	// Implicit queue
-	//////////////////////////////////
-	
-	struct ImplicitProducer : public ProducerBase
-	{			
-		ImplicitProducer(ConcurrentQueue* parent_) :
-			ProducerBase(parent_, false),
-			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
-			blockIndex(nullptr)
-		{
-			new_block_index();
-		}
-		
-		~ImplicitProducer()
-		{
-			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
-			// completed already; this means that all undequeued elements are placed contiguously across
-			// contiguous blocks, and that only the first and last remaining blocks can be only partially
-			// empty (all other remaining blocks must be completely full).
-			
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-			// Unregister ourselves for thread termination notification
-			if (!this->inactive.load(std::memory_order_relaxed)) {
-				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
-			}
-#endif
-			
-			// Destroy all remaining elements!
-			auto tail = this->tailIndex.load(std::memory_order_relaxed);
-			auto index = this->headIndex.load(std::memory_order_relaxed);
-			Block* block = nullptr;
-			assert(index == tail || details::circular_less_than(index, tail));
-			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
-			while (index != tail) {
-				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
-					if (block != nullptr) {
-						// Free the old block
-						this->parent->add_block_to_free_list(block);
-					}
-					
-					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
-				}
-				
-				((*block)[index])->~T();
-				++index;
-			}
-			// Even if the queue is empty, there's still one block that's not on the free list
-			// (unless the head index reached the end of it, in which case the tail will be poised
-			// to create a new block).
-			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
-				this->parent->add_block_to_free_list(this->tailBlock);
-			}
-			
-			// Destroy block index
-			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
-			if (localBlockIndex != nullptr) {
-				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
-					localBlockIndex->index[i]->~BlockIndexEntry();
-				}
-				do {
-					auto prev = localBlockIndex->prev;
-					localBlockIndex->~BlockIndexHeader();
-					(Traits::free)(localBlockIndex);
-					localBlockIndex = prev;
-				} while (localBlockIndex != nullptr);
-			}
-		}
-		
-		template<AllocationMode allocMode, typename U>
-		inline bool enqueue(U&& element)
-		{
-			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
-			index_t newTailIndex = 1 + currentTailIndex;
-			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
-				// We reached the end of a block, start a new one
-				auto head = this->headIndex.load(std::memory_order_relaxed);
-				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
-					return false;
-				}
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-				debug::DebugLock lock(mutex);
-#endif
-				// Find out where we'll be inserting this block in the block index
-				BlockIndexEntry* idxEntry;
-				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
-					return false;
-				}
-				
-				// Get ahold of a new block
-				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
-				if (newBlock == nullptr) {
-					rewind_block_index_tail();
-					idxEntry->value.store(nullptr, std::memory_order_relaxed);
-					return false;
-				}
-#ifdef MCDBGQ_TRACKMEM
-				newBlock->owner = this;
-#endif
-				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
-
-				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
-					// May throw, try to insert now before we publish the fact that we have this new block
-					MOODYCAMEL_TRY {
-						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
-					}
-					MOODYCAMEL_CATCH (...) {
-						rewind_block_index_tail();
-						idxEntry->value.store(nullptr, std::memory_order_relaxed);
-						this->parent->add_block_to_free_list(newBlock);
-						MOODYCAMEL_RETHROW;
-					}
-				}
-				
-				// Insert the new block into the index
-				idxEntry->value.store(newBlock, std::memory_order_relaxed);
-				
-				this->tailBlock = newBlock;
-				
-				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
-					this->tailIndex.store(newTailIndex, std::memory_order_release);
-					return true;
-				}
-			}
-			
-			// Enqueue
-			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-			
-			this->tailIndex.store(newTailIndex, std::memory_order_release);
-			return true;
-		}
-		
-		template<typename U>
-		bool dequeue(U& element)
-		{
-			// See ExplicitProducer::dequeue for rationale and explanation
-			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
-			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
-			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
-				std::atomic_thread_fence(std::memory_order_acquire);
-				
-				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-				tail = this->tailIndex.load(std::memory_order_acquire);
-				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
-					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-					
-					// Determine which block the element is in
-					auto entry = get_block_index_entry_for_index(index);
-					
-					// Dequeue
-					auto block = entry->value.load(std::memory_order_relaxed);
-					auto& el = *((*block)[index]);
-					
-					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-						// Note: Acquiring the mutex with every dequeue instead of only when a block
-						// is released is very sub-optimal, but it is, after all, purely debug code.
-						debug::DebugLock lock(producer->mutex);
-#endif
-						struct Guard {
-							Block* block;
-							index_t index;
-							BlockIndexEntry* entry;
-							ConcurrentQueue* parent;
-							
-							~Guard()
-							{
-								(*block)[index]->~T();
-								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
-									entry->value.store(nullptr, std::memory_order_relaxed);
-									parent->add_block_to_free_list(block);
-								}
-							}
-						} guard = { block, index, entry, this->parent };
-
-						element = std::move(el); // NOLINT
-					}
-					else {
-						element = std::move(el); // NOLINT
-						el.~T(); // NOLINT
-
-						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
-							{
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-								debug::DebugLock lock(mutex);
-#endif
-								// Add the block back into the global free pool (and remove from block index)
-								entry->value.store(nullptr, std::memory_order_relaxed);
-							}
-							this->parent->add_block_to_free_list(block);		// releases the above store
-						}
-					}
-					
-					return true;
-				}
-				else {
-					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
-				}
-			}
-		
-			return false;
-		}
-		
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4706)  // assignment within conditional expression
-#endif
-		template<AllocationMode allocMode, typename It>
-		bool enqueue_bulk(It itemFirst, size_t count)
-		{
-			// First, we need to make sure we have enough room to enqueue all of the elements;
-			// this means pre-allocating blocks and putting them in the block index (but only if
-			// all the allocations succeeded).
-			
-			// Note that the tailBlock we start off with may not be owned by us any more;
-			// this happens if it was filled up exactly to the top (setting tailIndex to
-			// the first index of the next block which is not yet allocated), then dequeued
-			// completely (putting it on the free list) before we enqueue again.
-			
-			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
-			auto startBlock = this->tailBlock;
-			Block* firstAllocatedBlock = nullptr;
-			auto endBlock = this->tailBlock;
-			
-			// Figure out how many blocks we'll need to allocate, and do so
-			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
-			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
-			if (blockBaseDiff > 0) {
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-				debug::DebugLock lock(mutex);
-#endif
-				do {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
-					// Find out where we'll be inserting this block in the block index
-					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
-					Block* newBlock;
-					bool indexInserted = false;
-					auto head = this->headIndex.load(std::memory_order_relaxed);
-					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
-
-					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
-						// Index allocation or block allocation failed; revert any other allocations
-						// and index insertions done so far for this operation
-						if (indexInserted) {
-							rewind_block_index_tail();
-							idxEntry->value.store(nullptr, std::memory_order_relaxed);
-						}
-						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
-						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
-							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-							idxEntry = get_block_index_entry_for_index(currentTailIndex);
-							idxEntry->value.store(nullptr, std::memory_order_relaxed);
-							rewind_block_index_tail();
-						}
-						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
-						this->tailBlock = startBlock;
-						
-						return false;
-					}
-					
-#ifdef MCDBGQ_TRACKMEM
-					newBlock->owner = this;
-#endif
-					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
-					newBlock->next = nullptr;
-					
-					// Insert the new block into the index
-					idxEntry->value.store(newBlock, std::memory_order_relaxed);
-					
-					// Store the chain of blocks so that we can undo if later allocations fail,
-					// and so that we can find the blocks when we do the actual enqueueing
-					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
-						assert(this->tailBlock != nullptr);
-						this->tailBlock->next = newBlock;
-					}
-					this->tailBlock = newBlock;
-					endBlock = newBlock;
-					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
-				} while (blockBaseDiff > 0);
-			}
-			
-			// Enqueue, one block at a time
-			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
-			currentTailIndex = startTailIndex;
-			this->tailBlock = startBlock;
-			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
-			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
-				this->tailBlock = firstAllocatedBlock;
-			}
-			while (true) {
-				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
-					stopIndex = newTailIndex;
-				}
-				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
-					while (currentTailIndex != stopIndex) {
-						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
-					}
-				}
-				else {
-					MOODYCAMEL_TRY {
-						while (currentTailIndex != stopIndex) {
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
-							++currentTailIndex;
-							++itemFirst;
-						}
-					}
-					MOODYCAMEL_CATCH (...) {
-						auto constructedStopIndex = currentTailIndex;
-						auto lastBlockEnqueued = this->tailBlock;
-						
-						if (!details::is_trivially_destructible<T>::value) {
-							auto block = startBlock;
-							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
-								block = firstAllocatedBlock;
-							}
-							currentTailIndex = startTailIndex;
-							while (true) {
-								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
-									stopIndex = constructedStopIndex;
-								}
-								while (currentTailIndex != stopIndex) {
-									(*block)[currentTailIndex++]->~T();
-								}
-								if (block == lastBlockEnqueued) {
-									break;
-								}
-								block = block->next;
-							}
-						}
-						
-						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
-						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
-							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
-							idxEntry->value.store(nullptr, std::memory_order_relaxed);
-							rewind_block_index_tail();
-						}
-						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
-						this->tailBlock = startBlock;
-						MOODYCAMEL_RETHROW;
-					}
-				}
-				
-				if (this->tailBlock == endBlock) {
-					assert(currentTailIndex == newTailIndex);
-					break;
-				}
-				this->tailBlock = this->tailBlock->next;
-			}
-			this->tailIndex.store(newTailIndex, std::memory_order_release);
-			return true;
-		}
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-		
-		template<typename It>
-		size_t dequeue_bulk(It& itemFirst, size_t max)
-		{
-			auto tail = this->tailIndex.load(std::memory_order_relaxed);
-			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
-			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
-			if (details::circular_less_than<size_t>(0, desiredCount)) {
-				desiredCount = desiredCount < max ? desiredCount : max;
-				std::atomic_thread_fence(std::memory_order_acquire);
-				
-				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-				
-				tail = this->tailIndex.load(std::memory_order_acquire);
-				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
-				if (details::circular_less_than<size_t>(0, actualCount)) {
-					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
-					if (actualCount < desiredCount) {
-						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
-					}
-					
-					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
-					// will never exceed tail.
-					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-					
-					// Iterate the blocks and dequeue
-					auto index = firstIndex;
-					BlockIndexHeader* localBlockIndex;
-					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
-					do {
-						auto blockStartIndex = index;
-						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-						
-						auto entry = localBlockIndex->index[indexIndex];
-						auto block = entry->value.load(std::memory_order_relaxed);
-						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
-							while (index != endIndex) {
-								auto& el = *((*block)[index]);
-								*itemFirst++ = std::move(el);
-								el.~T();
-								++index;
-							}
-						}
-						else {
-							MOODYCAMEL_TRY {
-								while (index != endIndex) {
-									auto& el = *((*block)[index]);
-									*itemFirst = std::move(el);
-									++itemFirst;
-									el.~T();
-									++index;
-								}
-							}
-							MOODYCAMEL_CATCH (...) {
-								do {
-									entry = localBlockIndex->index[indexIndex];
-									block = entry->value.load(std::memory_order_relaxed);
-									while (index != endIndex) {
-										(*block)[index++]->~T();
-									}
-									
-									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-										debug::DebugLock lock(mutex);
-#endif
-										entry->value.store(nullptr, std::memory_order_relaxed);
-										this->parent->add_block_to_free_list(block);
-									}
-									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
-									
-									blockStartIndex = index;
-									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
-									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-								} while (index != firstIndex + actualCount);
-								
-								MOODYCAMEL_RETHROW;
-							}
-						}
-						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
-							{
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-								debug::DebugLock lock(mutex);
-#endif
-								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
-								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
-								entry->value.store(nullptr, std::memory_order_relaxed);
-							}
-							this->parent->add_block_to_free_list(block);		// releases the above store
-						}
-						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
-					} while (index != firstIndex + actualCount);
-					
-					return actualCount;
-				}
-				else {
-					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
-				}
-			}
-			
-			return 0;
-		}
-		
-	private:
-		// The block size must be > 1, so any number with the low bit set is an invalid block base index
-		static const index_t INVALID_BLOCK_BASE = 1;
-		
-		struct BlockIndexEntry
-		{
-			std::atomic<index_t> key;
-			std::atomic<Block*> value;
-		};
-		
-		struct BlockIndexHeader
-		{
-			size_t capacity;
-			std::atomic<size_t> tail;
-			BlockIndexEntry* entries;
-			BlockIndexEntry** index;
-			BlockIndexHeader* prev;
-		};
-		
-		template<AllocationMode allocMode>
-		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
-		{
-			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
-			if (localBlockIndex == nullptr) {
-				return false;  // this can happen if new_block_index failed in the constructor
-			}
-			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
-			idxEntry = localBlockIndex->index[newTail];
-			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
-				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
-				
-				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
-				localBlockIndex->tail.store(newTail, std::memory_order_release);
-				return true;
-			}
-			
-			// No room in the old block index, try to allocate another one!
-			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
-				return false;
-			}
-			else if (!new_block_index()) {
-				return false;
-			}
-			else {
-				localBlockIndex = blockIndex.load(std::memory_order_relaxed);
-				newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
-				idxEntry = localBlockIndex->index[newTail];
-				assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
-				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
-				localBlockIndex->tail.store(newTail, std::memory_order_release);
-				return true;
-			}
-		}
-		
-		inline void rewind_block_index_tail()
-		{
-			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
-			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
-		}
-		
-		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
-		{
-			BlockIndexHeader* localBlockIndex;
-			auto idx = get_block_index_index_for_index(index, localBlockIndex);
-			return localBlockIndex->index[idx];
-		}
-		
-		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
-		{
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-			debug::DebugLock lock(mutex);
-#endif
-			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
-			localBlockIndex = blockIndex.load(std::memory_order_acquire);
-			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
-			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
-			assert(tailBase != INVALID_BLOCK_BASE);
-			// Note: Must use division instead of shift because the index may wrap around, causing a negative
-			// offset, whose negativity we want to preserve
-			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
-			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
-			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
-			return idx;
-		}
-		
-		bool new_block_index()
-		{
-			auto prev = blockIndex.load(std::memory_order_relaxed);
-			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
-			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
-			auto raw = static_cast<char*>((Traits::malloc)(
-				sizeof(BlockIndexHeader) +
-				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
-				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
-			if (raw == nullptr) {
-				return false;
-			}
-			
-			auto header = new (raw) BlockIndexHeader;
-			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
-			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
-			if (prev != nullptr) {
-				auto prevTail = prev->tail.load(std::memory_order_relaxed);
-				auto prevPos = prevTail;
-				size_t i = 0;
-				do {
-					prevPos = (prevPos + 1) & (prev->capacity - 1);
-					index[i++] = prev->index[prevPos];
-				} while (prevPos != prevTail);
-				assert(i == prevCapacity);
-			}
-			for (size_t i = 0; i != entryCount; ++i) {
-				new (entries + i) BlockIndexEntry;
-				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
-				index[prevCapacity + i] = entries + i;
-			}
-			header->prev = prev;
-			header->entries = entries;
-			header->index = index;
-			header->capacity = nextBlockIndexCapacity;
-			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
-			
-			blockIndex.store(header, std::memory_order_release);
-			
-			nextBlockIndexCapacity <<= 1;
-			
-			return true;
-		}
-		
-	private:
-		size_t nextBlockIndexCapacity;
-		std::atomic<BlockIndexHeader*> blockIndex;
-
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-	public:
-		details::ThreadExitListener threadExitListener;
-	private:
-#endif
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-	public:
-		ImplicitProducer* nextImplicitProducer;
-	private:
-#endif
-
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
-		mutable debug::DebugMutex mutex;
-#endif
-#ifdef MCDBGQ_TRACKMEM
-		friend struct MemStats;
-#endif
-	};
-	
-	
-	//////////////////////////////////
-	// Block pool manipulation
-	//////////////////////////////////
-	
-	void populate_initial_block_list(size_t blockCount)
-	{
-		initialBlockPoolSize = blockCount;
-		if (initialBlockPoolSize == 0) {
-			initialBlockPool = nullptr;
-			return;
-		}
-		
-		initialBlockPool = create_array<Block>(blockCount);
-		if (initialBlockPool == nullptr) {
-			initialBlockPoolSize = 0;
-		}
-		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
-			initialBlockPool[i].dynamicallyAllocated = false;
-		}
-	}
-	
-	inline Block* try_get_block_from_initial_pool()
-	{
-		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
-			return nullptr;
-		}
-		
-		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
-		
-		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
-	}
-	
-	inline void add_block_to_free_list(Block* block)
-	{
-#ifdef MCDBGQ_TRACKMEM
-		block->owner = nullptr;
-#endif
-		if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {
-			destroy(block);
-		}
-		else {
-			freeList.add(block);
-		}
-	}
-	
-	inline void add_blocks_to_free_list(Block* block)
-	{
-		while (block != nullptr) {
-			auto next = block->next;
-			add_block_to_free_list(block);
-			block = next;
-		}
-	}
-	
-	inline Block* try_get_block_from_free_list()
-	{
-		return freeList.try_get();
-	}
-	
-	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
-	template<AllocationMode canAlloc>
-	Block* requisition_block()
-	{
-		auto block = try_get_block_from_initial_pool();
-		if (block != nullptr) {
-			return block;
-		}
-		
-		block = try_get_block_from_free_list();
-		if (block != nullptr) {
-			return block;
-		}
-		
-		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
-			return create<Block>();
-		}
-		else {
-			return nullptr;
-		}
-	}
-	
-
-#ifdef MCDBGQ_TRACKMEM
-	public:
-		struct MemStats {
-			size_t allocatedBlocks;
-			size_t usedBlocks;
-			size_t freeBlocks;
-			size_t ownedBlocksExplicit;
-			size_t ownedBlocksImplicit;
-			size_t implicitProducers;
-			size_t explicitProducers;
-			size_t elementsEnqueued;
-			size_t blockClassBytes;
-			size_t queueClassBytes;
-			size_t implicitBlockIndexBytes;
-			size_t explicitBlockIndexBytes;
-			
-			friend class ConcurrentQueue;
-			
-		private:
-			static MemStats getFor(ConcurrentQueue* q)
-			{
-				MemStats stats = { 0 };
-				
-				stats.elementsEnqueued = q->size_approx();
-			
-				auto block = q->freeList.head_unsafe();
-				while (block != nullptr) {
-					++stats.allocatedBlocks;
-					++stats.freeBlocks;
-					block = block->freeListNext.load(std::memory_order_relaxed);
-				}
-				
-				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
-					stats.implicitProducers += implicit ? 1 : 0;
-					stats.explicitProducers += implicit ? 0 : 1;
-					
-					if (implicit) {
-						auto prod = static_cast<ImplicitProducer*>(ptr);
-						stats.queueClassBytes += sizeof(ImplicitProducer);
-						auto head = prod->headIndex.load(std::memory_order_relaxed);
-						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
-						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
-						if (hash != nullptr) {
-							for (size_t i = 0; i != hash->capacity; ++i) {
-								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
-									++stats.allocatedBlocks;
-									++stats.ownedBlocksImplicit;
-								}
-							}
-							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
-							for (; hash != nullptr; hash = hash->prev) {
-								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
-							}
-						}
-						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
-							//auto block = prod->get_block_index_entry_for_index(head);
-							++stats.usedBlocks;
-						}
-					}
-					else {
-						auto prod = static_cast<ExplicitProducer*>(ptr);
-						stats.queueClassBytes += sizeof(ExplicitProducer);
-						auto tailBlock = prod->tailBlock;
-						bool wasNonEmpty = false;
-						if (tailBlock != nullptr) {
-							auto block = tailBlock;
-							do {
-								++stats.allocatedBlocks;
-								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
-									++stats.usedBlocks;
-									wasNonEmpty = wasNonEmpty || block != tailBlock;
-								}
-								++stats.ownedBlocksExplicit;
-								block = block->next;
-							} while (block != tailBlock);
-						}
-						auto index = prod->blockIndex.load(std::memory_order_relaxed);
-						while (index != nullptr) {
-							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
-							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
-						}
-					}
-				}
-				
-				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
-				stats.allocatedBlocks += freeOnInitialPool;
-				stats.freeBlocks += freeOnInitialPool;
-				
-				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
-				stats.queueClassBytes += sizeof(ConcurrentQueue);
-				
-				return stats;
-			}
-		};
-		
-		// For debugging only. Not thread-safe.
-		MemStats getMemStats()
-		{
-			return MemStats::getFor(this);
-		}
-	private:
-		friend struct MemStats;
-#endif
-	
-	
-	//////////////////////////////////
-	// Producer list manipulation
-	//////////////////////////////////	
-	
-	ProducerBase* recycle_or_create_producer(bool isExplicit)
-	{
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
-		debug::DebugLock lock(implicitProdMutex);
-#endif
-		// Try to re-use one first
-		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
-			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
-				bool expected = true;
-				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
-					// We caught one! It's been marked as activated, the caller can have it
-					return ptr;
-				}
-			}
-		}
-
-		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
-	}
-	
-	ProducerBase* add_producer(ProducerBase* producer)
-	{
-		// Handle failed memory allocation
-		if (producer == nullptr) {
-			return nullptr;
-		}
-		
-		producerCount.fetch_add(1, std::memory_order_relaxed);
-		
-		// Add it to the lock-free list
-		auto prevTail = producerListTail.load(std::memory_order_relaxed);
-		do {
-			producer->next = prevTail;
-		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
-		
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-		if (producer->isExplicit) {
-			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
-			do {
-				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
-			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
-		}
-		else {
-			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
-			do {
-				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
-			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
-		}
-#endif
-		
-		return producer;
-	}
-	
-	void reown_producers()
-	{
-		// After another instance is moved-into/swapped-with this one, all the
-		// producers we stole still think their parents are the other queue.
-		// So fix them up!
-		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
-			ptr->parent = this;
-		}
-	}
-	
-	
-	//////////////////////////////////
-	// Implicit producer hash
-	//////////////////////////////////
-	
-	struct ImplicitProducerKVP
-	{
-		std::atomic<details::thread_id_t> key;
-		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
-		
-		ImplicitProducerKVP() : value(nullptr) { }
-		
-		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
-		{
-			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
-			value = other.value;
-		}
-		
-		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
-		{
-			swap(other);
-			return *this;
-		}
-		
-		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
-		{
-			if (this != &other) {
-				details::swap_relaxed(key, other.key);
-				std::swap(value, other.value);
-			}
-		}
-	};
-	
-	template<typename XT, typename XTraits>
-	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
-	
-	struct ImplicitProducerHash
-	{
-		size_t capacity;
-		ImplicitProducerKVP* entries;
-		ImplicitProducerHash* prev;
-	};
-	
-	inline void populate_initial_implicit_producer_hash()
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
-			return;
-		}
-		else {
-			implicitProducerHashCount.store(0, std::memory_order_relaxed);
-			auto hash = &initialImplicitProducerHash;
-			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-			hash->entries = &initialImplicitProducerHashEntries[0];
-			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
-				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
-			}
-			hash->prev = nullptr;
-			implicitProducerHash.store(hash, std::memory_order_relaxed);
-		}
-	}
-	
-	void swap_implicit_producer_hashes(ConcurrentQueue& other)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
-			return;
-		}
-		else {
-			// Swap (assumes our implicit producer hash is initialized)
-			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
-			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
-			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
-			
-			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
-			
-			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
-			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
-				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
-			}
-			else {
-				ImplicitProducerHash* hash;
-				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
-					continue;
-				}
-				hash->prev = &initialImplicitProducerHash;
-			}
-			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
-				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
-			}
-			else {
-				ImplicitProducerHash* hash;
-				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
-					continue;
-				}
-				hash->prev = &other.initialImplicitProducerHash;
-			}
-		}
-	}
-	
-	// Only fails (returns nullptr) if memory allocation fails
-	ImplicitProducer* get_or_add_implicit_producer()
-	{
-		// Note that since the data is essentially thread-local (key is thread ID),
-		// there's a reduced need for fences (memory ordering is already consistent
-		// for any individual thread), except for the current table itself.
-		
-		// Start by looking for the thread ID in the current and all previous hash tables.
-		// If it's not found, it must not be in there yet, since this same thread would
-		// have added it previously to one of the tables that we traversed.
-		
-		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
-		
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
-		debug::DebugLock lock(implicitProdMutex);
-#endif
-		
-		auto id = details::thread_id();
-		auto hashedId = details::hash_thread_id(id);
-		
-		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
-		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
-		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
-			// Look for the id in this hash
-			auto index = hashedId;
-			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
-				index &= hash->capacity - 1u;
-				
-				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
-				if (probedKey == id) {
-					// Found it! If we had to search several hashes deep, though, we should lazily add it
-					// to the current main hash table to avoid the extended search next time.
-					// Note there's guaranteed to be room in the current hash table since every subsequent
-					// table implicitly reserves space for all previous tables (there's only one
-					// implicitProducerHashCount).
-					auto value = hash->entries[index].value;
-					if (hash != mainHash) {
-						index = hashedId;
-						while (true) {
-							index &= mainHash->capacity - 1u;
-							auto empty = details::invalid_thread_id;
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-							auto reusable = details::invalid_thread_id2;
-							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed) ||
-								mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
-#else
-							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
-#endif
-								mainHash->entries[index].value = value;
-								break;
-							}
-							++index;
-						}
-					}
-					
-					return value;
-				}
-				if (probedKey == details::invalid_thread_id) {
-					break;		// Not in this hash table
-				}
-				++index;
-			}
-		}
-		
-		// Insert!
-		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
-		while (true) {
-			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
-			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
-				// We've acquired the resize lock, try to allocate a bigger hash table.
-				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
-				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
-				// locked block).
-				mainHash = implicitProducerHash.load(std::memory_order_acquire);
-				if (newCount >= (mainHash->capacity >> 1)) {
-					size_t newCapacity = mainHash->capacity << 1;
-					while (newCount >= (newCapacity >> 1)) {
-						newCapacity <<= 1;
-					}
-					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
-					if (raw == nullptr) {
-						// Allocation failed
-						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
-						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
-						return nullptr;
-					}
-					
-					auto newHash = new (raw) ImplicitProducerHash;
-					newHash->capacity = static_cast<size_t>(newCapacity);
-					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
-					for (size_t i = 0; i != newCapacity; ++i) {
-						new (newHash->entries + i) ImplicitProducerKVP;
-						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
-					}
-					newHash->prev = mainHash;
-					implicitProducerHash.store(newHash, std::memory_order_release);
-					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
-					mainHash = newHash;
-				}
-				else {
-					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
-				}
-			}
-			
-			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
-			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
-			// always be true)
-			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
-				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
-				if (producer == nullptr) {
-					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
-					return nullptr;
-				}
-				
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
-				producer->threadExitListener.userData = producer;
-				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
-#endif
-				
-				auto index = hashedId;
-				while (true) {
-					index &= mainHash->capacity - 1u;
-					auto empty = details::invalid_thread_id;
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-					auto reusable = details::invalid_thread_id2;
-					if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
-						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already counted as a used slot
-						mainHash->entries[index].value = producer;
-						break;
-					}
-#endif
-					if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
-						mainHash->entries[index].value = producer;
-						break;
-					}
-					++index;
-				}
-				return producer;
-			}
-			
-			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
-			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
-			// we try to allocate ourselves).
-			mainHash = implicitProducerHash.load(std::memory_order_acquire);
-		}
-	}
-	
-#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-	void implicit_producer_thread_exited(ImplicitProducer* producer)
-	{
-		// Remove from hash
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
-		debug::DebugLock lock(implicitProdMutex);
-#endif
-		auto hash = implicitProducerHash.load(std::memory_order_acquire);
-		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
-		auto id = details::thread_id();
-		auto hashedId = details::hash_thread_id(id);
-		details::thread_id_t probedKey;
-		
-		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
-		// trying to add an entry thinking there's a free slot (because they reused a producer)
-		for (; hash != nullptr; hash = hash->prev) {
-			auto index = hashedId;
-			do {
-				index &= hash->capacity - 1u;
-				probedKey = id;
-				if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) {
-					break;
-				}
-				++index;
-			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
-		}
-		
-		// Mark the queue as being recyclable
-		producer->inactive.store(true, std::memory_order_release);
-	}
-	
-	static void implicit_producer_thread_exited_callback(void* userData)
-	{
-		auto producer = static_cast<ImplicitProducer*>(userData);
-		auto queue = producer->parent;
-		queue->implicit_producer_thread_exited(producer);
-	}
-#endif
-	
-	//////////////////////////////////
-	// Utility functions
-	//////////////////////////////////
-
-	template<typename TAlign>
-	static inline void* aligned_malloc(size_t size)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
-			return (Traits::malloc)(size);
-		else {
-			size_t alignment = std::alignment_of<TAlign>::value;
-			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
-			if (!raw)
-				return nullptr;
-			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
-			*(reinterpret_cast<void**>(ptr) - 1) = raw;
-			return ptr;
-		}
-	}
-
-	template<typename TAlign>
-	static inline void aligned_free(void* ptr)
-	{
-		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
-			return (Traits::free)(ptr);
-		else
-			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
-	}
-
-	template<typename U>
-	static inline U* create_array(size_t count)
-	{
-		assert(count > 0);
-		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
-		if (p == nullptr)
-			return nullptr;
-
-		for (size_t i = 0; i != count; ++i)
-			new (p + i) U();
-		return p;
-	}
-
-	template<typename U>
-	static inline void destroy_array(U* p, size_t count)
-	{
-		if (p != nullptr) {
-			assert(count > 0);
-			for (size_t i = count; i != 0; )
-				(p + --i)->~U();
-		}
-		aligned_free<U>(p);
-	}
-
-	template<typename U>
-	static inline U* create()
-	{
-		void* p = aligned_malloc<U>(sizeof(U));
-		return p != nullptr ? new (p) U : nullptr;
-	}
-
-	template<typename U, typename A1>
-	static inline U* create(A1&& a1)
-	{
-		void* p = aligned_malloc<U>(sizeof(U));
-		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
-	}
-
-	template<typename U>
-	static inline void destroy(U* p)
-	{
-		if (p != nullptr)
-			p->~U();
-		aligned_free<U>(p);
-	}
-
-private:
-	std::atomic<ProducerBase*> producerListTail;
-	std::atomic<std::uint32_t> producerCount;
-	
-	std::atomic<size_t> initialBlockPoolIndex;
-	Block* initialBlockPool;
-	size_t initialBlockPoolSize;
-	
-#ifndef MCDBGQ_USEDEBUGFREELIST
-	FreeList<Block> freeList;
-#else
-	debug::DebugFreeList<Block> freeList;
-#endif
-	
-	std::atomic<ImplicitProducerHash*> implicitProducerHash;
-	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
-	ImplicitProducerHash initialImplicitProducerHash;
-	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
-	std::atomic_flag implicitProducerHashResizeInProgress;
-	
-	std::atomic<std::uint32_t> nextExplicitConsumerId;
-	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
-	
-#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
-	debug::DebugMutex implicitProdMutex;
-#endif
-	
-#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
-	std::atomic<ExplicitProducer*> explicitProducers;
-	std::atomic<ImplicitProducer*> implicitProducers;
-#endif
-};
-
-
-template<typename T, typename Traits>
-ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
-	: producer(queue.recycle_or_create_producer(true))
-{
-	if (producer != nullptr) {
-		producer->token = this;
-	}
-}
-
-template<typename T, typename Traits>
-ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
-	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
-{
-	if (producer != nullptr) {
-		producer->token = this;
-	}
-}
-
-template<typename T, typename Traits>
-ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
-	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
-{
-	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
-}
-
-template<typename T, typename Traits>
-ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
-	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
-{
-	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
-}
-
-template<typename T, typename Traits>
-inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
-{
-	a.swap(b);
-}
-
-inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
-{
-	a.swap(b);
-}
-
-inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
-{
-	a.swap(b);
-}
-
-template<typename T, typename Traits>
-inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
-{
-	a.swap(b);
-}
-
-}
-
-#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
-#pragma warning(pop)
-#endif
-
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#pragma GCC diagnostic pop
-#endif
diff --git a/src/moodycamel/lightweightsemaphore.h b/src/moodycamel/lightweightsemaphore.h
deleted file mode 100644
index a0414751..00000000
--- a/src/moodycamel/lightweightsemaphore.h
+++ /dev/null
@@ -1,427 +0,0 @@
-// Provides an efficient implementation of a semaphore (LightweightSemaphore).
-// This is an extension of Jeff Preshing's sempahore implementation (licensed 
-// under the terms of its separate zlib license) that has been adapted and
-// extended by Cameron Desrochers.
-
-#pragma once
-
-#include <cstddef> // For std::size_t
-#include <atomic>
-#include <type_traits> // For std::make_signed<T>
-
-#if defined(_WIN32)
-// Avoid including windows.h in a header; we only need a handful of
-// items, so we'll redeclare them here (this is relatively safe since
-// the API generally has to remain stable between Windows versions).
-// I know this is an ugly hack but it still beats polluting the global
-// namespace with thousands of generic names or adding a .cpp for nothing.
-extern "C" {
-	struct _SECURITY_ATTRIBUTES;
-	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
-	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
-	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
-	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
-}
-#elif defined(__MACH__)
-#include <mach/mach.h>
-#elif defined(__MVS__)
-#include <zos-semaphore.h>
-#elif defined(__unix__)
-#include <semaphore.h>
-
-#if defined(__GLIBC_PREREQ) && defined(_GNU_SOURCE)
-#if __GLIBC_PREREQ(2,30)
-#define MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
-#endif
-#endif
-#endif
-
-namespace moodycamel
-{
-namespace details
-{
-
-// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
-// portable + lightweight semaphore implementations, originally from
-// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
-// LICENSE:
-// Copyright (c) 2015 Jeff Preshing
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-//	claim that you wrote the original software. If you use this software
-//	in a product, an acknowledgement in the product documentation would be
-//	appreciated but is not required.
-// 2. Altered source versions must be plainly marked as such, and must not be
-//	misrepresented as being the original software.
-// 3. This notice may not be removed or altered from any source distribution.
-#if defined(_WIN32)
-class Semaphore
-{
-private:
-	void* m_hSema;
-	
-	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-public:
-	Semaphore(int initialCount = 0)
-	{
-		assert(initialCount >= 0);
-		const long maxLong = 0x7fffffff;
-		m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-		assert(m_hSema);
-	}
-
-	~Semaphore()
-	{
-		CloseHandle(m_hSema);
-	}
-
-	bool wait()
-	{
-		const unsigned long infinite = 0xffffffff;
-		return WaitForSingleObject(m_hSema, infinite) == 0;
-	}
-	
-	bool try_wait()
-	{
-		return WaitForSingleObject(m_hSema, 0) == 0;
-	}
-	
-	bool timed_wait(std::uint64_t usecs)
-	{
-		return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
-	}
-
-	void signal(int count = 1)
-	{
-		while (!ReleaseSemaphore(m_hSema, count, nullptr));
-	}
-};
-#elif defined(__MACH__)
-//---------------------------------------------------------
-// Semaphore (Apple iOS and OSX)
-// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
-//---------------------------------------------------------
-class Semaphore
-{
-private:
-	semaphore_t m_sema;
-
-	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-public:
-	Semaphore(int initialCount = 0)
-	{
-		assert(initialCount >= 0);
-		kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-		assert(rc == KERN_SUCCESS);
-		(void)rc;
-	}
-
-	~Semaphore()
-	{
-		semaphore_destroy(mach_task_self(), m_sema);
-	}
-
-	bool wait()
-	{
-		return semaphore_wait(m_sema) == KERN_SUCCESS;
-	}
-	
-	bool try_wait()
-	{
-		return timed_wait(0);
-	}
-	
-	bool timed_wait(std::uint64_t timeout_usecs)
-	{
-		mach_timespec_t ts;
-		ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
-		ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
-
-		// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
-		kern_return_t rc = semaphore_timedwait(m_sema, ts);
-		return rc == KERN_SUCCESS;
-	}
-
-	void signal()
-	{
-		while (semaphore_signal(m_sema) != KERN_SUCCESS);
-	}
-
-	void signal(int count)
-	{
-		while (count-- > 0)
-		{
-			while (semaphore_signal(m_sema) != KERN_SUCCESS);
-		}
-	}
-};
-#elif defined(__unix__) || defined(__MVS__)
-//---------------------------------------------------------
-// Semaphore (POSIX, Linux, zOS)
-//---------------------------------------------------------
-class Semaphore
-{
-private:
-	sem_t m_sema;
-
-	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-public:
-	Semaphore(int initialCount = 0)
-	{
-		assert(initialCount >= 0);
-		int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
-		assert(rc == 0);
-		(void)rc;
-	}
-
-	~Semaphore()
-	{
-		sem_destroy(&m_sema);
-	}
-
-	bool wait()
-	{
-		// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-		int rc;
-		do {
-			rc = sem_wait(&m_sema);
-		} while (rc == -1 && errno == EINTR);
-		return rc == 0;
-	}
-
-	bool try_wait()
-	{
-		int rc;
-		do {
-			rc = sem_trywait(&m_sema);
-		} while (rc == -1 && errno == EINTR);
-		return rc == 0;
-	}
-
-	bool timed_wait(std::uint64_t usecs)
-	{
-		struct timespec ts;
-		const int usecs_in_1_sec = 1000000;
-		const int nsecs_in_1_sec = 1000000000;
-#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
-		clock_gettime(CLOCK_MONOTONIC, &ts);
-#else
-		clock_gettime(CLOCK_REALTIME, &ts);
-#endif
-		ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
-		ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
-		// sem_timedwait bombs if you have more than 1e9 in tv_nsec
-		// so we have to clean things up before passing it in
-		if (ts.tv_nsec >= nsecs_in_1_sec) {
-			ts.tv_nsec -= nsecs_in_1_sec;
-			++ts.tv_sec;
-		}
-
-		int rc;
-		do {
-#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
-			rc = sem_clockwait(&m_sema, CLOCK_MONOTONIC, &ts);
-#else
-			rc = sem_timedwait(&m_sema, &ts);
-#endif
-		} while (rc == -1 && errno == EINTR);
-		return rc == 0;
-	}
-
-	void signal()
-	{
-		while (sem_post(&m_sema) == -1);
-	}
-
-	void signal(int count)
-	{
-		while (count-- > 0)
-		{
-			while (sem_post(&m_sema) == -1);
-		}
-	}
-};
-#else
-#error Unsupported platform! (No semaphore wrapper available)
-#endif
-
-}	// end namespace details
-
-
-//---------------------------------------------------------
-// LightweightSemaphore
-//---------------------------------------------------------
-class LightweightSemaphore
-{
-public:
-	typedef std::make_signed<std::size_t>::type ssize_t;
-
-private:
-	std::atomic<ssize_t> m_count;
-	details::Semaphore m_sema;
-	int m_maxSpins;
-
-	bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
-	{
-		ssize_t oldCount;
-		int spin = m_maxSpins;
-		while (--spin >= 0)
-		{
-			oldCount = m_count.load(std::memory_order_relaxed);
-			if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-				return true;
-			std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
-		}
-		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		if (oldCount > 0)
-			return true;
-		if (timeout_usecs < 0)
-		{
-			if (m_sema.wait())
-				return true;
-		}
-		if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
-			return true;
-		// At this point, we've timed out waiting for the semaphore, but the
-		// count is still decremented indicating we may still be waiting on
-		// it. So we have to re-adjust the count, but only if the semaphore
-		// wasn't signaled enough times for us too since then. If it was, we
-		// need to release the semaphore too.
-		while (true)
-		{
-			oldCount = m_count.load(std::memory_order_acquire);
-			if (oldCount >= 0 && m_sema.try_wait())
-				return true;
-			if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
-				return false;
-		}
-	}
-
-	ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
-	{
-		assert(max > 0);
-		ssize_t oldCount;
-		int spin = m_maxSpins;
-		while (--spin >= 0)
-		{
-			oldCount = m_count.load(std::memory_order_relaxed);
-			if (oldCount > 0)
-			{
-				ssize_t newCount = oldCount > max ? oldCount - max : 0;
-				if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-					return oldCount - newCount;
-			}
-			std::atomic_signal_fence(std::memory_order_acquire);
-		}
-		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		if (oldCount <= 0)
-		{
-			if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
-			{
-				while (true)
-				{
-					oldCount = m_count.load(std::memory_order_acquire);
-					if (oldCount >= 0 && m_sema.try_wait())
-						break;
-					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
-						return 0;
-				}
-			}
-		}
-		if (max > 1)
-			return 1 + tryWaitMany(max - 1);
-		return 1;
-	}
-
-public:
-	LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
-	{
-		assert(initialCount >= 0);
-		assert(maxSpins >= 0);
-	}
-
-	bool tryWait()
-	{
-		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		while (oldCount > 0)
-		{
-			if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-				return true;
-		}
-		return false;
-	}
-
-	bool wait()
-	{
-		return tryWait() || waitWithPartialSpinning();
-	}
-
-	bool wait(std::int64_t timeout_usecs)
-	{
-		return tryWait() || waitWithPartialSpinning(timeout_usecs);
-	}
-
-	// Acquires between 0 and (greedily) max, inclusive
-	ssize_t tryWaitMany(ssize_t max)
-	{
-		assert(max >= 0);
-		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		while (oldCount > 0)
-		{
-			ssize_t newCount = oldCount > max ? oldCount - max : 0;
-			if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-				return oldCount - newCount;
-		}
-		return 0;
-	}
-
-	// Acquires at least one, and (greedily) at most max
-	ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
-	{
-		assert(max >= 0);
-		ssize_t result = tryWaitMany(max);
-		if (result == 0 && max > 0)
-			result = waitManyWithPartialSpinning(max, timeout_usecs);
-		return result;
-	}
-	
-	ssize_t waitMany(ssize_t max)
-	{
-		ssize_t result = waitMany(max, -1);
-		assert(result > 0);
-		return result;
-	}
-
-	void signal(ssize_t count = 1)
-	{
-		assert(count >= 0);
-		ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
-		ssize_t toRelease = -oldCount < count ? -oldCount : count;
-		if (toRelease > 0)
-		{
-			m_sema.signal((int)toRelease);
-		}
-	}
-	
-	std::size_t availableApprox() const
-	{
-		ssize_t count = m_count.load(std::memory_order_relaxed);
-		return count > 0 ? static_cast<std::size_t>(count) : 0;
-	}
-};
-
-}   // end namespace moodycamel
diff --git a/src/thread_pool.hpp b/src/thread_pool.hpp
deleted file mode 100644
index 1094e1d0..00000000
--- a/src/thread_pool.hpp
+++ /dev/null
@@ -1,303 +0,0 @@
-#pragma once
-
-#include "moodycamel/blockingconcurrentqueue.h"
-#include "syslog.hpp"
-
-#include <atomic>
-#include <csignal>
-#include <cstring>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <stdexcept>
-#include <string>
-#include <thread>
-#include <vector>
-
-
-struct ThreadPoolTraits : public moodycamel::ConcurrentQueueDefaultTraits
-{
-  static const int MAX_SEMA_SPINS = 1;
-};
-
-
-class ThreadPool
-{
-private:
-  using Func  = std::function<void(void)>;
-  using Queue = moodycamel::BlockingConcurrentQueue<Func,ThreadPoolTraits>;
-
-public:
-  explicit
-  ThreadPool(std::size_t const thread_count_ = std::thread::hardware_concurrency(),
-             std::size_t const queue_depth_  = 1,
-             std::string const name_         = {})
-    : _queue(queue_depth_,thread_count_,thread_count_),
-      _name(get_thread_name(name_))
-  {
-    syslog_debug("threadpool: spawning %zu threads of queue depth %zu named '%s'",
-                 thread_count_,
-                 queue_depth_,
-                 _name.c_str());
-
-    sigset_t oldset;
-    sigset_t newset;
-
-    sigfillset(&newset);
-    pthread_sigmask(SIG_BLOCK,&newset,&oldset);
-
-    _threads.reserve(thread_count_);
-    for(std::size_t i = 0; i < thread_count_; ++i)
-      {
-        int rv;
-        pthread_t t;
-
-        rv = pthread_create(&t,NULL,ThreadPool::start_routine,this);
-        if(rv != 0)
-          {
-            syslog_warning("threadpool: error spawning thread - %d (%s)",
-                           rv,
-                           strerror(rv));
-            continue;
-          }
-
-        if(!_name.empty())
-          pthread_setname_np(t,_name.c_str());
-
-        _threads.push_back(t);
-      }
-
-    pthread_sigmask(SIG_SETMASK,&oldset,NULL);
-
-    if(_threads.empty())
-      throw std::runtime_error("threadpool: failed to spawn any threads");
-  }
-
-  ~ThreadPool()
-  {
-    syslog_debug("threadpool: destroying %zu threads named '%s'",
-                 _threads.size(),
-                 _name.c_str());
-
-    for(auto t : _threads)
-      pthread_cancel(t);
-
-    Func f;
-    while(_queue.try_dequeue(f))
-      continue;
-
-    for(auto t : _threads)
-      pthread_join(t,NULL);
-  }
-
-private:
-  static
-  std::string
-  get_thread_name(std::string const name_)
-  {
-    if(!name_.empty())
-      return name_;
-
-    char name[16];
-    pthread_getname_np(pthread_self(),name,sizeof(name));
-
-    return name;
-  }
-
-  static
-  void*
-  start_routine(void *arg_)
-  {
-    ThreadPool *btp = static_cast<ThreadPool*>(arg_);
-    ThreadPool::Func func;
-    ThreadPool::Queue &q = btp->_queue;
-    moodycamel::ConsumerToken ctok(btp->_queue);
-
-    while(true)
-      {
-        q.wait_dequeue(ctok,func);
-
-        func();
-      }
-
-    return NULL;
-  }
-
-public:
-  int
-  add_thread(std::string const name_ = {})
-  {
-    int rv;
-    pthread_t t;
-    sigset_t oldset;
-    sigset_t newset;
-    std::string name;
-
-    name = (name_.empty() ? _name : name_);
-
-    sigfillset(&newset);
-    pthread_sigmask(SIG_BLOCK,&newset,&oldset);
-    rv = pthread_create(&t,NULL,ThreadPool::start_routine,this);
-    pthread_sigmask(SIG_SETMASK,&oldset,NULL);
-
-    if(rv != 0)
-      {
-        syslog_warning("threadpool: error spawning thread - %d (%s)",
-                       rv,
-                       strerror(rv));
-        return -rv;
-      }
-
-    if(!name.empty())
-      pthread_setname_np(t,name.c_str());
-
-    {
-      std::lock_guard<std::mutex> lg(_threads_mutex);
-      _threads.push_back(t);
-    }
-
-    syslog_debug("threadpool: 1 thread added to pool '%s' named '%s'",
-                 _name.c_str(),
-                 name.c_str());
-
-    return 0;
-  }
-
-  int
-  remove_thread(void)
-  {
-    {
-      std::lock_guard<std::mutex> lg(_threads_mutex);
-      if(_threads.size() <= 1)
-        return -EINVAL;
-    }
-
-    std::promise<pthread_t> promise;
-    auto func = [&]()
-    {
-      pthread_t t;
-
-      t = pthread_self();
-      promise.set_value(t);
-
-      {
-        std::lock_guard<std::mutex> lg(_threads_mutex);
-
-        for(auto i = _threads.begin(); i != _threads.end(); ++i)
-          {
-            if(*i != t)
-              continue;
-
-            _threads.erase(i);
-            break;
-          }
-      }
-
-      char name[16];
-      pthread_getname_np(t,name,sizeof(name));
-      syslog_debug("threadpool: 1 thread removed from pool '%s' named '%s'",
-                   _name.c_str(),
-                   name);
-
-      pthread_exit(NULL);
-    };
-
-    enqueue_work(func);
-    pthread_join(promise.get_future().get(),NULL);
-
-    return 0;
-  }
-
-  int
-  set_threads(std::size_t const count_)
-  {
-    int diff;
-    {
-      std::lock_guard<std::mutex> lg(_threads_mutex);
-
-      diff = ((int)count_ - (int)_threads.size());
-    }
-
-    for(auto i = diff; i > 0; --i)
-      add_thread();
-    for(auto i = diff; i < 0; ++i)
-      remove_thread();
-
-    return diff;
-  }
-
-public:
-  template<typename FuncType>
-  void
-  enqueue_work(moodycamel::ProducerToken  &ptok_,
-               FuncType                  &&f_)
-  {
-    timespec ts = {0,10};
-    while(true)
-      {
-        if(_queue.try_enqueue(ptok_,f_))
-          return;
-        ::nanosleep(&ts,NULL);
-        ts.tv_nsec += 10;
-      }
-  }
-
-  template<typename FuncType>
-  void
-  enqueue_work(FuncType &&f_)
-  {
-    timespec ts = {0,10};
-    while(true)
-      {
-        if(_queue.try_enqueue(f_))
-          return;
-        ::nanosleep(&ts,NULL);
-        ts.tv_nsec += 10;
-      }
-  }
-
-  template<typename FuncType>
-  [[nodiscard]]
-  std::future<typename std::result_of<FuncType()>::type>
-  enqueue_task(FuncType&& f_)
-  {
-    using TaskReturnType = typename std::result_of<FuncType()>::type;
-    using Promise        = std::promise<TaskReturnType>;
-
-    auto promise = std::make_shared<Promise>();
-    auto future  = promise->get_future();
-    auto work    = [=]()
-    {
-      auto rv = f_();
-      promise->set_value(rv);
-    };
-
-    timespec ts = {0,10};
-    while(true)
-      {
-        if(_queue.try_enqueue(work))
-          break;
-        ::nanosleep(&ts,NULL);
-        ts.tv_nsec += 10;
-      }
-
-    return future;
-  }
-
-public:
-  std::vector<pthread_t>
-  threads() const
-  {
-    std::lock_guard<std::mutex> lg(_threads_mutex);
-
-    return _threads;
-  }
-
-private:
-  Queue _queue;
-
-private:
-  std::string const      _name;
-  std::vector<pthread_t> _threads;
-  mutable std::mutex     _threads_mutex;
-};