]> git.sesse.net Git - casparcg/commitdiff
2.0.0.2: Updated tbb version.
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Wed, 25 May 2011 15:31:31 +0000 (15:31 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Wed, 25 May 2011 15:31:31 +0000 (15:31 +0000)
118 files changed:
tbb/CHANGES [new file with mode: 0644]
tbb/COPYING [new file with mode: 0644]
tbb/README [new file with mode: 0644]
tbb/bin/ia32/vc10/irml/irml.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/irml/irml.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/irml/irml_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/irml/irml_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/irml_c/irml.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/irml_c/irml.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/irml_c/irml_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/irml_c/irml_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_preview.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_preview.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_preview_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbb_preview_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_proxy.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_proxy.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.dll [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.pdb [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbvars.bat [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbvars.csh [new file with mode: 0644]
tbb/bin/ia32/vc10/tbbvars.sh [new file with mode: 0644]
tbb/bin/tbbvars.bat [new file with mode: 0644]
tbb/include/index.html [new file with mode: 0644]
tbb/include/tbb/_aggregator_internal.h [new file with mode: 0644]
tbb/include/tbb/_concurrent_queue_internal.h [new file with mode: 0644]
tbb/include/tbb/_concurrent_unordered_internal.h [new file with mode: 0644]
tbb/include/tbb/_item_buffer.h [new file with mode: 0644]
tbb/include/tbb/_tbb_windef.h [new file with mode: 0644]
tbb/include/tbb/aligned_space.h [new file with mode: 0644]
tbb/include/tbb/atomic.h [new file with mode: 0644]
tbb/include/tbb/blocked_range.h [new file with mode: 0644]
tbb/include/tbb/blocked_range2d.h [new file with mode: 0644]
tbb/include/tbb/blocked_range3d.h [new file with mode: 0644]
tbb/include/tbb/cache_aligned_allocator.h [new file with mode: 0644]
tbb/include/tbb/combinable.h [new file with mode: 0644]
tbb/include/tbb/compat/condition_variable [new file with mode: 0644]
tbb/include/tbb/compat/ppl.h [new file with mode: 0644]
tbb/include/tbb/compat/thread [new file with mode: 0644]
tbb/include/tbb/compat/tuple [new file with mode: 0644]
tbb/include/tbb/concurrent_hash_map.h [new file with mode: 0644]
tbb/include/tbb/concurrent_priority_queue.h [new file with mode: 0644]
tbb/include/tbb/concurrent_queue.h [new file with mode: 0644]
tbb/include/tbb/concurrent_unordered_map.h [new file with mode: 0644]
tbb/include/tbb/concurrent_vector.h [new file with mode: 0644]
tbb/include/tbb/critical_section.h [new file with mode: 0644]
tbb/include/tbb/enumerable_thread_specific.h [new file with mode: 0644]
tbb/include/tbb/graph.h [new file with mode: 0644]
tbb/include/tbb/index.html [new file with mode: 0644]
tbb/include/tbb/machine/gcc_generic.h [new file with mode: 0644]
tbb/include/tbb/machine/ibm_aix51.h [new file with mode: 0644]
tbb/include/tbb/machine/linux_common.h [new file with mode: 0644]
tbb/include/tbb/machine/linux_ia32.h [new file with mode: 0644]
tbb/include/tbb/machine/linux_ia64.h [new file with mode: 0644]
tbb/include/tbb/machine/linux_intel64.h [new file with mode: 0644]
tbb/include/tbb/machine/mac_ppc.h [new file with mode: 0644]
tbb/include/tbb/machine/macos_common.h [new file with mode: 0644]
tbb/include/tbb/machine/sunos_sparc.h [new file with mode: 0644]
tbb/include/tbb/machine/windows_api.h [new file with mode: 0644]
tbb/include/tbb/machine/windows_ia32.h [new file with mode: 0644]
tbb/include/tbb/machine/windows_intel64.h [new file with mode: 0644]
tbb/include/tbb/machine/xbox360_ppc.h [new file with mode: 0644]
tbb/include/tbb/mutex.h [new file with mode: 0644]
tbb/include/tbb/null_mutex.h [new file with mode: 0644]
tbb/include/tbb/null_rw_mutex.h [new file with mode: 0644]
tbb/include/tbb/parallel_do.h [new file with mode: 0644]
tbb/include/tbb/parallel_for.h [new file with mode: 0644]
tbb/include/tbb/parallel_for_each.h [new file with mode: 0644]
tbb/include/tbb/parallel_invoke.h [new file with mode: 0644]
tbb/include/tbb/parallel_reduce.h [new file with mode: 0644]
tbb/include/tbb/parallel_scan.h [new file with mode: 0644]
tbb/include/tbb/parallel_sort.h [new file with mode: 0644]
tbb/include/tbb/parallel_while.h [new file with mode: 0644]
tbb/include/tbb/partitioner.h [new file with mode: 0644]
tbb/include/tbb/pipeline.h [new file with mode: 0644]
tbb/include/tbb/queuing_mutex.h [new file with mode: 0644]
tbb/include/tbb/queuing_rw_mutex.h [new file with mode: 0644]
tbb/include/tbb/reader_writer_lock.h [new file with mode: 0644]
tbb/include/tbb/recursive_mutex.h [new file with mode: 0644]
tbb/include/tbb/scalable_allocator.h [new file with mode: 0644]
tbb/include/tbb/spin_mutex.h [new file with mode: 0644]
tbb/include/tbb/spin_rw_mutex.h [new file with mode: 0644]
tbb/include/tbb/task.h [new file with mode: 0644]
tbb/include/tbb/task_group.h [new file with mode: 0644]
tbb/include/tbb/task_scheduler_init.h [new file with mode: 0644]
tbb/include/tbb/task_scheduler_observer.h [new file with mode: 0644]
tbb/include/tbb/tbb.h [new file with mode: 0644]
tbb/include/tbb/tbb_allocator.h [new file with mode: 0644]
tbb/include/tbb/tbb_config.h [new file with mode: 0644]
tbb/include/tbb/tbb_exception.h [new file with mode: 0644]
tbb/include/tbb/tbb_machine.h [new file with mode: 0644]
tbb/include/tbb/tbb_profiling.h [new file with mode: 0644]
tbb/include/tbb/tbb_stddef.h [new file with mode: 0644]
tbb/include/tbb/tbb_thread.h [new file with mode: 0644]
tbb/include/tbb/tbbmalloc_proxy.h [new file with mode: 0644]
tbb/include/tbb/tick_count.h [new file with mode: 0644]
tbb/lib/ia32/vc10/irml/irml.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/irml/irml_debug.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/irml_c/irml.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/irml_c/irml_debug.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbb.def [new file with mode: 0644]
tbb/lib/ia32/vc10/tbb.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbb_debug.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbb_preview.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbb_preview_debug.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbbmalloc.def [new file with mode: 0644]
tbb/lib/ia32/vc10/tbbmalloc.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbbmalloc_debug.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbbmalloc_proxy.lib [new file with mode: 0644]
tbb/lib/ia32/vc10/tbbmalloc_proxy_debug.lib [new file with mode: 0644]

diff --git a/tbb/CHANGES b/tbb/CHANGES
new file mode 100644 (file)
index 0000000..414740f
--- /dev/null
@@ -0,0 +1,1074 @@
+TBB 3.0 Update 7 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 6 commercial-aligned release):
+
+- Added implementation of the platform isolation layer based on 
+  GCC atomic built-ins; it is supposed to work on any platform 
+  where GCC has these built-ins. 
+
+Community Preview Features:
+
+- Graph's dining_philosophers example added
+- A number of improvements to graph and concurrent_priority_queue
+
+
+------------------------------------------------------------------------
+TBB 3.0 Update 6 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 5 commercial-aligned release):
+
+- Added Community Preview feature: task and task group priority, and
+    Fractal example demonstrating it.
+- parallel_pipeline optimized for data items of small and large sizes.
+- Graph's join_node is now parametrized with a tuple of up to 10 types.
+- Improved performance of concurrent_priority_queue.
+
+Open-source contributions integrated:
+
+- Initial NetBSD support by Aleksej Saushev.
+
+Bugs fixed:
+
+- Failure to locate Cilk runtime library to enable Cilk/TBB interop.
+- Data race that could result in concurrent_unordered_map structure
+    corruption after call to clear() method.
+- Crash caused by invoking Cilk/TBB interop after one of the libraries
+    is unloaded.
+- Stack corruption caused by PIC version of 64-bit CAS compiled by Intel
+    compiler on Linux.
+- Inconsistency of exception propagation mode possible when application
+    built with Microsoft* Visual Studio* 2008 or earlier uses TBB built
+    with Microsoft* Visual Studio* 2010.
+- Affinitizing master thread to a subset of available CPUs after TBB
+    scheduler was initialized tied all worker threads to the same CPUs.
+- Method is_stolen_task() always returned 'false' for affinitized tasks.
+- write_once_node and overwrite_node did not immediately send buffered
+    items to successors
+
+------------------------------------------------------------------------
+TBB 3.0 Update 5 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 4 commercial-aligned release):
+
+- Added Community Preview feature: graph.
+- Added automatic propagation of master thread FPU settings to
+    TBB worker threads.
+- Added a public function to perform a sequentially consistent full 
+    memory fence: tbb::atomic_fence() in tbb/atomic.h.
+
+Bugs fixed:
+
+- Data race that could result in scheduler data structures corruption
+    when using fire-and-forget tasks.
+- Potential referencing of destroyed concurrent_hash_map element after
+    using erase(accessor&A) method with A acquired as const_accessor.
+- Fixed a correctness bug in the convex hull example.
+
+Open-source contributions integrated:
+
+- Patch for calls to internal::atomic_do_once() by Andrey Semashev.
+
+------------------------------------------------------------------------
+TBB 3.0 Update 4 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 3 commercial-aligned release):
+
+- Added Community Preview feature: concurrent_priority_queue.
+- Fixed library loading to avoid possibility for remote code execution,
+    see http://www.microsoft.com/technet/security/advisory/2269637.mspx.
+- Added support of more than 64 cores for appropriate Microsoft* 
+    Windows* versions. For more details, see 
+    http://msdn.microsoft.com/en-us/library/dd405503.aspx.
+- Default number of worker threads is adjusted in accordance with 
+    process affinity mask.
+
+Bugs fixed:
+
+- Calls of scalable_* functions from inside the allocator library 
+    caused issues if the functions were overridden by another module.
+- A crash occurred if methods run() and wait() were called concurrently
+    for an empty tbb::task_group (1736).
+- The tachyon example exhibited build problems associated with 
+    bug 554339 on Microsoft* Visual Studio* 2010. Project files were
+    modified as a partial workaround to overcome the problem. See 
+    http://connect.microsoft.com/VisualStudio/feedback/details/554339.
+
+------------------------------------------------------------------------
+TBB 3.0 Update 3 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 2 commercial-aligned release):
+
+- cache_aligned_allocator class reworked to use scalable_aligned_malloc.
+- Improved performance of count() and equal_range() methods
+    in concurrent_unordered_map.
+- Improved implementation of 64-bit atomic loads and stores on 32-bit
+    platforms, including compilation with VC 7.1.
+- Added implementation of atomic operations on top of OSAtomic API
+    provided by Mac OS* X.
+- Removed gratuitous try/catch blocks surrounding thread function calls
+  in tbb_thread.
+- Xcode* projects were added for sudoku and game_of_life examples.
+- Xcode* projects were updated to work without TBB framework.
+
+Bugs fixed:
+
+- Fixed a data race in task scheduler destruction that on rare occasion
+    could result in memory corruption.
+- Fixed idle spinning in thread bound filters in tbb::pipeline (1670).
+
+Open-source contributions integrated:
+
+- MinGW-64 basic support by brsomoza (partially).
+- Patch for atomic.h by Andrey Semashev.
+- Support for AIX & GCC on PowerPC by Giannis Papadopoulos.
+- Various improvements by Raf Schietekat.
+
+------------------------------------------------------------------------
+TBB 3.0 Update 2 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 Update 1 commercial-aligned release):
+
+- Destructor of tbb::task_group class throws missing_wait exception
+    if there are tasks running when it is invoked.
+- Cilk-TBB interop layer added to protect TBB TLS in case of
+    "Cilk-TBB-Cilk nesting" usage model.
+- Compilation fix for dependent template names in concurrent_queue.
+- Memory allocator code refactored to ease development and maintenance.
+
+Bugs fixed:
+
+- Improved interoperability with other Intel software tools on Linux in
+    case of dynamic replacement of memory allocator (1700)
+- Fixed install issues that prevented installation on
+    Mac OS* X 10.6.4 (1711).
+
+------------------------------------------------------------------------
+TBB 3.0 Update 1 commercial-aligned release
+
+Changes (w.r.t. TBB 3.0 commercial-aligned release):
+
+- Decreased memory fragmentation by allocations bigger than 8K.
+- Lazily allocate worker threads, to avoid creating unnecessary stacks.
+
+Bugs fixed:
+
+- TBB allocator used much more memory than malloc (1703) - see above.
+- Deadlocks happened in some specific initialization scenarios
+    of the TBB allocator (1701, 1704).
+- Regression in enumerable_thread_specific: excessive requirements
+    for object constructors.
+- A bug in construction of parallel_pipeline filters when body instance
+    was a temporary object.
+- Incorrect usage of memory fences on PowerPC and XBOX360 platforms.
+- A subtle issue in task group context binding that could result
+    in cancelation signal being missed by nested task groups.
+- Incorrect construction of concurrent_unordered_map if specified
+    number of buckets is not power of two.
+- Broken count() and equal_range() of concurrent_unordered_map.
+- Return type of postfix form of operator++ for hash map's iterators.
+
+------------------------------------------------------------------------
+TBB 3.0 commercial-aligned release
+
+Changes (w.r.t. TBB 2.2 Update 3 commercial-aligned release):
+
+- All open-source-release changes down to TBB 2.2 U3 below
+    were incorporated into this release.
+
+------------------------------------------------------------------------
+20100406 open-source release
+
+Changes (w.r.t. 20100310 open-source release):
+
+- Added support for Microsoft* Visual Studio* 2010, including binaries.
+- Added a PDF file with recommended Design Patterns for TBB.
+- Added parallel_pipeline function and companion classes and functions
+    that provide a strongly typed lambda-friendly pipeline interface.
+- Reworked enumerable_thread_specific to use a custom implementation of
+    hash map that is more efficient for ETS usage models.
+- Added example for class task_group; see examples/task_group/sudoku.
+- Removed two examples, as they were long outdated and superceded:
+    pipeline/text_filter (use pipeline/square);
+    parallel_while/parallel_preorder (use parallel_do/parallel_preorder).
+- PDF documentation updated.
+- Other fixes and changes in code, tests, and examples.
+
+Bugs fixed:
+
+- Eliminated build errors with MinGW32.
+- Fixed post-build step and other issues in VS projects for examples.
+- Fixed discrepancy between scalable_realloc and scalable_msize that
+    caused crashes with malloc replacement on Windows.
+
+------------------------------------------------------------------------
+20100310 open-source release
+
+Changes (w.r.t. TBB 2.2 Update 3 commercial-aligned release):
+
+- Version macros changed in anticipation of a future release.
+- Directory structure aligned with Intel(R) C++ Compiler;
+    now TBB binaries reside in <arch>/<os_key>/[bin|lib]
+    (in TBB 2.x, it was [bin|lib]/<arch>/<os_key>).
+- Visual Studio projects changed for examples: instead of separate set
+    of files for each VS version, now there is single 'msvs' directory
+    that contains workspaces for MS C++ compiler (<example>_cl.sln) and
+    Intel C++ compiler (<example>_icl.sln). Works with VS 2005 and above.
+- The name versioning scheme for backward compatibility was improved;
+    now compatibility-breaking changes are done in a separate namespace.
+- Added concurrent_unordered_map implementation based on a prototype
+    developed in Microsoft for a future version of PPL.
+- Added PPL-compatible writer-preference RW lock (reader_writer_lock).
+- Added TBB_IMPLEMENT_CPP0X macro to control injection of C++0x names
+    implemented in TBB into namespace std.
+- Added almost-C++0x-compatible std::condition_variable, plus a bunch
+    of other C++0x classes required by condition_variable.
+- With TBB_IMPLEMENT_CPP0X, tbb_thread can be also used as std::thread.
+- task.cpp was split into several translation units to structure
+    TBB scheduler sources layout. Static data layout and library
+    initialization logic were also updated.
+- TBB scheduler reworked to prevent master threads from stealing
+    work belonging to other masters.
+- Class task was extended with enqueue() method, and slightly changed
+    semantics of methods spawn() and destroy(). For exact semantics,
+    refer to TBB Reference manual.
+- task_group_context now allows for destruction by non-owner threads.
+- Added TBB_USE_EXCEPTIONS macro to control use of exceptions in TBB
+    headers. It turns off (i.e. sets to 0) automatically if specified
+    compiler options disable exception handling.
+- TBB is enabled to run on top of Microsoft's Concurrency Runtime
+    on Windows* 7 (via our worker dispatcher known as RML).
+- Removed old unused busy-waiting code in concurrent_queue.
+- Described the advanced build & test options in src/index.html.
+- Warning level for GCC raised with -Wextra and a few other options.
+- Multiple fixes and improvements in code, tests, examples, and docs.
+
+Open-source contributions integrated:
+
+- Xbox support by Roman Lut (Deep Shadows), though further changes are
+    required to make it working; e.g. post-2.1 entry points are missing.
+- "Eventcount" by Dmitry Vyukov evolved into concurrent_monitor,
+    an internal class used in the implementation of concurrent_queue.
+
+------------------------------------------------------------------------
+TBB 2.2 Update 3 commercial-aligned release
+
+Changes (w.r.t. TBB 2.2 Update 2 commercial-aligned release):
+
+- PDF documentation updated.
+
+Bugs fixed:
+
+- concurrent_hash_map compatibility issue exposed on Linux in case
+    two versions of the container were used by different modules.
+- enforce 16 byte stack alignment for consistence with GCC; required
+    to work correctly with 128-bit variables processed by SSE.
+- construct() methods of allocator classes now use global operator new.
+
+------------------------------------------------------------------------
+TBB 2.2 Update 2 commercial-aligned release
+
+Changes (w.r.t. TBB 2.2 Update 1 commercial-aligned release):
+
+- parallel_invoke and parallel_for_each now take function objects
+    by const reference, not by value.
+- Building TBB with /MT is supported, to avoid dependency on particular
+    versions of Visual C++* runtime DLLs. TBB DLLs built with /MT
+    are located in vc_mt directory.
+- Class critical_section introduced.
+- Improvements in exception support: new exception classes introduced,
+    all exceptions are thrown via an out-of-line internal method.
+- Improvements and fixes in the TBB allocator and malloc replacement,
+    including robust memory identification, and more reliable dynamic
+    function substitution on Windows*.
+- Method swap() added to class tbb_thread.
+- Methods rehash() and bucket_count() added to concurrent_hash_map.
+- Added support for Visual Studio* 2010 Beta2. No special binaries
+    provided, but CRT-independent DLLs (vc_mt) should work.
+- Other fixes and improvements in code, tests, examples, and docs.
+
+Open-source contributions integrated:
+
+- The fix to build 32-bit TBB on Mac OS* X 10.6.
+- GCC-based port for SPARC Solaris by Michailo Matijkiw, with use of
+    earlier work by Raf Schietekat.
+
+Bugs fixed:
+
+- 159 - TBB build for PowerPC* running Mac OS* X.
+- 160 - IBM* Java segfault if used with TBB allocator.
+- crash in concurrent_queue<char> (1616).
+
+------------------------------------------------------------------------
+TBB 2.2 Update 1 commercial-aligned release
+
+Changes (w.r.t. TBB 2.2 commercial-aligned release):
+
+- Incorporates all changes from open-source releases below.
+- Documentation was updated.
+- TBB scheduler auto-initialization now covers all possible use cases.
+- concurrent_queue: made argument types of sizeof used in paddings
+  consistent with those actually used.
+- Memory allocator was improved: supported corner case of user's malloc
+    calling scalable_malloc (non-Windows), corrected processing of
+    memory allocation requests during tbb memory allocator startup
+    (Linux).
+- Windows malloc replacement has got better support for static objects.
+- In pipeline setups that do not allow actual parallelism, execution
+    by a single thread is guaranteed, idle spinning eliminated, and
+    performance improved.
+- RML refactoring and clean-up.
+- New constructor for concurrent_hash_map allows reserving space for
+    a number of items.
+- Operator delete() added to the TBB exception classes.
+- Lambda support was improved in parallel_reduce.
+- gcc 4.3 warnings were fixed for concurrent_queue.
+- Fixed possible initialization deadlock in modules using TBB entities
+    during construction of global static objects.
+- Copy constructor in concurrent_hash_map was fixed.
+- Fixed a couple of rare crashes in the scheduler possible before
+    in very specific use cases.
+- Fixed a rare crash in the TBB allocator running out of memory.
+- New tests were implemented, including test_lambda.cpp that checks
+    support for lambda expressions.
+- A few other small changes in code, tests, and documentation.
+
+------------------------------------------------------------------------
+20090809 open-source release
+
+Changes (w.r.t. TBB 2.2 commercial-aligned release):
+
+- Fixed known exception safety issues in concurrent_vector.
+- Better concurrency of simultaneous grow requests in concurrent_vector.
+- TBB allocator further improves performance of large object allocation.
+- Problem with source of text relocations was fixed on Linux
+- Fixed bugs related to malloc replacement under Windows
+- A few other small changes in code and documentation.
+
+------------------------------------------------------------------------
+TBB 2.2 commercial-aligned release
+
+Changes (w.r.t. TBB 2.1 U4 commercial-aligned release):
+
+- Incorporates all changes from open-source releases below.
+- Architecture folders renamed from em64t to intel64 and from itanium
+    to ia64.
+- Major Interface version changed from 3 to 4. Deprecated interfaces
+    might be removed in future releases.
+- Parallel algorithms that use partitioners have switched to use
+    the auto_partitioner by default.
+- Improved memory allocator performance for allocations bigger than 8K.
+- Added new thread-bound filters functionality for pipeline.
+- New implementation of concurrent_hash_map that improves performance
+    significantly.
+- A few other small changes in code and documentation.
+
+------------------------------------------------------------------------
+20090511 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Basic support for MinGW32 development kit.
+- Added tbb::zero_allocator class that initializes memory with zeros.
+    It can be used as an adaptor to any STL-compatible allocator class.
+- Added tbb::parallel_for_each template function as alias to parallel_do.
+- Added more overloads for tbb::parallel_for.
+- Added support for exact exception propagation (can only be used with
+    compilers that support C++0x std::exception_ptr).
+- tbb::atomic template class can be used with enumerations.
+- mutex, recursive_mutex, spin_mutex, spin_rw_mutex classes extended
+    with explicit lock/unlock methods.
+- Fixed size() and grow_to_at_least() methods of tbb::concurrent_vector
+    to provide space allocation guarantees. More methods added for
+    compatibility with std::vector, including some from C++0x.
+- Preview of a lambda-friendly interface for low-level use of tasks.
+- scalable_msize function added to the scalable allocator (Windows only).
+- Rationalized internal auxiliary functions for spin-waiting and backoff.
+- Several tests undergo decent refactoring.
+
+Changes affecting backward compatibility:
+
+- Improvements in concurrent_queue, including limited API changes.
+    The previous version is deprecated; its functionality is accessible
+    via methods of the new tbb::concurrent_bounded_queue class.
+- grow* and push_back methods of concurrent_vector changed to return
+    iterators; old semantics is deprecated.
+
+------------------------------------------------------------------------
+TBB 2.1 Update 4 commercial-aligned release
+
+Changes (w.r.t. TBB 2.1 U3 commercial-aligned release):
+
+- Added tests for aligned memory allocations and malloc replacement.
+- Several improvements for better bundling with Intel(R) C++ Compiler.
+- A few other small changes in code and documentaion.
+
+Bugs fixed:
+
+- 150 - request to build TBB examples with debug info in release mode.
+- backward compatibility issue with concurrent_queue on Windows.
+- dependency on VS 2005 SP1 runtime libraries removed.
+- compilation of GUI examples under Xcode* 3.1 (1577).
+- On Windows, TBB allocator classes can be instantiated with const types
+    for compatibility with MS implementation of STL containers (1566).
+
+------------------------------------------------------------------------
+20090313 open-source release
+
+Changes (w.r.t. 20081109 open-source release):
+
+- Includes all changes introduced in TBB 2.1 Update 2 & Update 3
+    commercial-aligned releases (see below for details).
+- Added tbb::parallel_invoke template function. It runs up to 10
+    user-defined functions in parallel and waits for them to complete.
+- Added a special library providing ability to replace the standard
+    memory allocation routines in Microsoft* C/C++ RTL (malloc/free,
+    global new/delete, etc.) with the TBB memory allocator.
+    Usage details are described in include/tbb/tbbmalloc_proxy.h file.
+- Task scheduler switched to use new implementation of its core
+    functionality (deque based task pool, new structure of arena slots).
+- Preview of Microsoft* Visual Studio* 2005 project files for
+    building the library is available in build/vsproject folder.
+- Added tests for aligned memory allocations and malloc replacement.
+- Added parallel_for/game_of_life.net example (for Windows only)
+    showing TBB usage in a .NET application.
+- A number of other fixes and improvements to code, tests, makefiles,
+    examples and documents.
+
+Bugs fixed:
+
+- The same list as in TBB 2.1 Update 4 right above.
+
+------------------------------------------------------------------------
+TBB 2.1 Update 3 commercial-aligned release
+
+Changes (w.r.t. TBB 2.1 U2 commercial-aligned release):
+
+- Added support for aligned allocations to the TBB memory allocator.
+- Added a special library to use with LD_PRELOAD on Linux* in order to
+    replace the standard memory allocation routines in C/C++ with the
+    TBB memory allocator.
+- Added null_mutex and null_rw_mutex: no-op classes interface-compliant
+    to other TBB mutexes.
+- Improved performance of parallel_sort, to close most of the serial gap
+    with std::sort, and beat it on 2 and more cores.
+- A few other small changes.
+
+Bugs fixed:
+
+- the problem where parallel_for hanged after exception throw
+    if affinity_partitioner was used (1556).
+- get rid of VS warnings about mbstowcs deprecation (1560),
+    as well as some other warnings.
+- operator== for concurrent_vector::iterator fixed to work correctly
+    with different vector instances.
+
+------------------------------------------------------------------------
+TBB 2.1 Update 2 commercial-aligned release
+
+Changes (w.r.t. TBB 2.1 U1 commercial-aligned release):
+
+- Incorporates all open-source-release changes down to TBB 2.1 U1,
+    except for:
+    - 20081019 addition of enumerable_thread_specific;
+- Warning level for Microsoft* Visual C++* compiler raised to /W4 /Wp64;
+    warnings found on this level were cleaned or suppressed.
+- Added TBB_runtime_interface_version API function.
+- Added new example: pipeline/square.
+- Added exception handling and cancellation support
+    for parallel_do and pipeline.
+- Added copy constructor and [begin,end) constructor to concurrent_queue.
+- Added some support for beta version of Intel(R) Parallel Amplifier.
+- Added scripts to set environment for cross-compilation of 32-bit
+    applications on 64-bit Linux with Intel(R) C++ Compiler.
+- Fixed semantics of concurrent_vector::clear() to not deallocate
+    internal arrays. Fixed compact() to perform such deallocation later.
+- Fixed the issue with atomic<T*> when T is incomplete type.
+- Improved support for PowerPC* Macintosh*, including the fix
+    for a bug in masked compare-and-swap reported by a customer.
+- As usual, a number of other improvements everywhere.
+
+------------------------------------------------------------------------
+20081109 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Added new serial out of order filter for tbb::pipeline.
+- Fixed the issue with atomic<T*>::operator= reported at the forum.
+- Fixed the issue with using tbb::task::self() in task destructor
+    reported at the forum.
+- A number of other improvements to code, tests, makefiles, examples
+    and documents.
+
+Open-source contributions integrated:
+- Changes in the memory allocator were partially integrated.
+
+------------------------------------------------------------------------
+20081019 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Introduced enumerable_thread_specific<T>.  This new class provides a
+    wrapper around native thread local storage as well as iterators and
+    ranges for accessing the thread local copies (1533).
+- Improved support for Intel(R) Threading Analysis Tools
+    on Intel(R) 64 architecture.
+- Dependency from Microsoft* CRT was integrated to the libraries using
+    manifests, to avoid issues if called from code that uses different
+    version of Visual C++* runtime than the library.
+- Introduced new defines TBB_USE_ASSERT, TBB_USE_DEBUG,
+    TBB_USE_PERFORMANCE_WARNINGS, TBB_USE_THREADING_TOOLS.
+- A number of other improvements to code, tests, makefiles, examples
+    and documents.
+
+Open-source contributions integrated:
+
+- linker optimization: /incremental:no .
+
+------------------------------------------------------------------------
+20080925 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Same fix for a memory leak in the memory allocator as in TBB 2.1 U1.
+- Improved support for lambda functions.
+- Fixed more concurrent_queue issues reported at the forum.
+- A number of other improvements to code, tests, makefiles, examples
+    and documents.
+
+------------------------------------------------------------------------
+TBB 2.1 Update 1 commercial-aligned release
+
+Changes (w.r.t. TBB 2.1 commercial-aligned release):
+
+- Fixed small memory leak in the memory allocator.
+- Incorporates all open-source-release changes since TBB 2.1, except for:
+    - 20080825 changes for parallel_do;
+
+------------------------------------------------------------------------
+20080825 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Added exception handling and cancellation support for parallel_do.
+- Added default HashCompare template argument for concurrent_hash_map.
+- Fixed concurrent_queue.clear() issues due to incorrect assumption
+    about clear() being private method.
+- Added the possibility to use TBB in applications that change
+    default calling conventions (Windows* only).
+- Many improvements to code, tests, examples, makefiles and documents.
+
+Bugs fixed:
+
+- 120, 130 - memset declaration missed in concurrent_hash_map.h
+
+------------------------------------------------------------------------
+20080724 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Inline assembly for atomic operations improved for gcc 4.3
+- A few more improvements to the code.
+
+------------------------------------------------------------------------
+20080709 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- operator=() was added to the tbb_thread class according to
+    the current working draft for std::thread.
+- Recognizing SPARC* in makefiles for Linux* and Sun Solaris*.
+
+Bugs fixed:
+
+- 127 - concurrent_hash_map::range fixed to split correctly.
+
+Open-source contributions integrated:
+
+- fix_set_midpoint.diff by jyasskin
+- SPARC* support in makefiles by Raf Schietekat
+
+------------------------------------------------------------------------
+20080622 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Fixed a hang that rarely happened on Linux
+    during deinitialization of the TBB scheduler.
+- Improved support for Intel(R) Thread Checker.
+- A few more improvements to the code.
+
+------------------------------------------------------------------------
+TBB 2.1 commercial-aligned release
+
+Changes (w.r.t. TBB 2.0 U3 commercial-aligned release):
+
+- All open-source-release changes down to, and including, TBB 2.0 below,
+    were incorporated into this release.
+
+------------------------------------------------------------------------
+20080605 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Explicit control of exported symbols by version scripts added on Linux.
+- Interfaces polished for exception handling & algorithm cancellation.
+- Cache behavior improvements in the scalable allocator.
+- Improvements in text_filter, polygon_overlay, and other examples.
+- A lot of other stability improvements in code, tests, and makefiles.
+- First release where binary packages include headers/docs/examples, so
+    binary packages are now self-sufficient for using TBB.
+
+Open-source contributions integrated:
+
+- atomics patch (partially).
+- tick_count warning patch.
+
+Bugs fixed:
+
+- 118 - fix for boost compatibility.
+- 123 - fix for tbb_machine.h.
+
+------------------------------------------------------------------------
+20080512 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Fixed a problem with backward binary compatibility
+    of debug Linux builds.
+- Sun* Studio* support added.
+- soname support added on Linux via linker script. To restore backward
+    binary compatibility, *.so -> *.so.2 softlinks should be created.
+- concurrent_hash_map improvements - added few new forms of insert()
+    method and fixed precondition and guarantees of erase() methods.
+    Added runtime warning reporting about bad hash function used for
+    the container. Various improvements for performance and concurrency.
+- Cancellation mechanism reworked so that it does not hurt scalability.
+- Algorithm parallel_do reworked. Requirement for Body::argument_type
+    definition removed, and work item argument type can be arbitrarily
+    cv-qualified.
+- polygon_overlay example added.
+- A few more improvements to code, tests, examples and Makefiles.
+
+Open-source contributions integrated:
+
+- Soname support patch for Bugzilla #112.
+
+Bugs fixed:
+
+- 112 - fix for soname support.
+
+------------------------------------------------------------------------
+TBB 2.0 U3 commercial-aligned release (package 017, April 20, 2008)
+
+Corresponds to commercial 019 (for Linux*, 020; for Mac OS* X, 018)
+packages.
+
+Changes (w.r.t. TBB 2.0 U2 commercial-aligned release):
+
+- Does not contain open-source-release changes below; this release is
+    only a minor update of TBB 2.0 U2.
+- Removed spin-waiting in pipeline and concurrent_queue.
+- A few more small bug fixes from open-source releases below.
+
+------------------------------------------------------------------------
+20080408 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- count_strings example reworked: new word generator implemented, hash
+    function replaced, and tbb_allocator is used with std::string class.
+- Static methods of spin_rw_mutex were replaced by normal member
+    functions, and the class name was versioned.
+- tacheon example was renamed to tachyon.
+- Improved support for Intel(R) Thread Checker.
+- A few more minor improvements.
+
+Open-source contributions integrated:
+
+- Two sets of Sun patches for IA Solaris support.
+
+------------------------------------------------------------------------
+20080402 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Exception handling and cancellation support for tasks and algorithms
+    fully enabled.
+- Exception safety guaranties defined and fixed for all concurrent
+    containers.
+- User-defined memory allocator support added to all concurrent
+    containers.
+- Performance improvement of concurrent_hash_map, spin_rw_mutex.
+- Critical fix for a rare race condition during scheduler
+    initialization/de-initialization.
+- New methods added for concurrent containers to be closer to STL,
+    as well as automatic filters removal from pipeline
+    and __TBB_AtomicAND function.
+- The volatile keyword dropped from where it is not really needed.
+- A few more minor improvements.
+
+------------------------------------------------------------------------
+20080319 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Support for gcc version 4.3 was added.
+- tbb_thread class, near compatible with std::thread expected in C++0x,
+    was added.
+
+Bugs fixed:
+
+- 116 - fix for compilation issues with gcc version 4.2.1.
+- 120 - fix for compilation issues with gcc version 4.3.
+
+------------------------------------------------------------------------
+20080311 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- An enumerator added for pipeline filter types (serial vs. parallel).
+- New task_scheduler_observer class introduced, to observe when
+    threads start and finish interacting with the TBB task scheduler.
+- task_scheduler_init reverted to not use internal versioned class;
+    binary compatibility guaranteed with stable releases only.
+- Various improvements to code, tests, examples and Makefiles.
+
+------------------------------------------------------------------------
+20080304 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Task-to-thread affinity support, previously kept under a macro,
+    now fully legalized.
+- Work-in-progress on cache_aligned_allocator improvements.
+- Pipeline really supports parallel input stage; it's no more serialized.
+- Various improvements to code, tests, examples and Makefiles.
+
+Bugs fixed:
+
+- 119 - fix for scalable_malloc sometimes failing to return a big block.
+- TR575 - fixed a deadlock occurring on Windows in startup/shutdown
+    under some conditions.
+
+------------------------------------------------------------------------
+20080226 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Introduced tbb_allocator to select between standard allocator and
+    tbb::scalable_allocator when available.
+- Removed spin-waiting in pipeline and concurrent_queue.
+- Improved performance of concurrent_hash_map by using tbb_allocator.
+- Improved support for Intel(R) Thread Checker.
+- Various improvements to code, tests, examples and Makefiles.
+
+------------------------------------------------------------------------
+TBB 2.0 U2 commercial-aligned release (package 017, February 14, 2008)
+
+Corresponds to commercial 017 (for Linux*, 018; for Mac OS* X, 016)
+packages.
+
+Changes (w.r.t. TBB 2.0 U1 commercial-aligned release):
+
+- Does not contain open-source-release changes below; this release is
+    only a minor update of TBB 2.0 U1.
+- Add support for Microsoft* Visual Studio* 2008, including binary
+    libraries and VS2008 projects for examples.
+- Use SwitchToThread() not Sleep() to yield threads on Windows*.
+- Enhancements to Doxygen-readable comments in source code.
+- A few more small bug fixes from open-source releases below.
+
+Bugs fixed:
+
+- TR569 - Memory leak in concurrent_queue.
+
+------------------------------------------------------------------------
+20080207 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Improvements and minor fixes in VS2008 projects for examples.
+- Improvements in code for gating worker threads that wait for work,
+  previously consolidated under #if IMPROVED_GATING, now legalized.
+- Cosmetic changes in code, examples, tests.
+
+Bugs fixed:
+
+- 113 - Iterators and ranges should be convertible to their const
+    counterparts.
+- TR569 - Memory leak in concurrent_queue.
+
+------------------------------------------------------------------------
+20080122 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Updated examples/parallel_for/seismic to improve the visuals and to
+    use the affinity_partitioner (20071127 and forward) for better
+    performance.
+- Minor improvements to unittests and performance tests.
+
+------------------------------------------------------------------------
+20080115 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Cleanup, simplifications and enhancements to the Makefiles for
+    building the libraries (see build/index.html for high-level
+    changes) and the examples.
+- Use SwitchToThread() not Sleep() to yield threads on Windows*.
+- Engineering work-in-progress on exception safety/support.
+- Engineering work-in-progress on affinity_partitioner for
+    parallel_reduce.
+- Engineering work-in-progress on improved gating for worker threads
+    (idle workers now block in the OS instead of spinning).
+- Enhancements to Doxygen-readable comments in source code.
+
+Bugs fixed:
+
+- 102 - Support for parallel build with gmake -j
+- 114 - /Wp64 build warning on Windows*.
+
+------------------------------------------------------------------------
+20071218 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Full support for Microsoft* Visual Studio* 2008 in open-source.
+    Binaries for vc9/ will be available in future stable releases.
+- New recursive_mutex class.
+- Full support for 32-bit PowerMac including export files for builds.
+- Improvements to parallel_do.
+
+------------------------------------------------------------------------
+20071206 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Support for Microsoft* Visual Studio* 2008 in building libraries
+    from source as well as in vc9/ projects for examples.
+- Small fixes to the affinity_partitioner first introduced in 20071127.
+- Small fixes to the thread-stack size hook first introduced in 20071127.
+- Engineering work in progress on concurrent_vector.
+- Engineering work in progress on exception behavior.
+- Unittest improvements.
+
+------------------------------------------------------------------------
+20071127 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- Task-to-thread affinity support (affinity partitioner) first appears.
+- More work on concurrent_vector.
+- New parallel_do algorithm (function-style version of parallel while)
+    and parallel_do/parallel_preorder example.
+- New task_scheduler_init() hooks for getting default_num_threads() and
+    for setting thread stack size.
+- Support for weak memory consistency models in the code base.
+- Futex usage in the task scheduler (Linux).
+- Started adding 32-bit PowerMac support.
+- Intel(R) 9.1 compilers are now the base supported Intel(R) compiler
+    version.
+- TBB libraries added to link line automatically on Microsoft Windows*
+    systems via #pragma comment linker directives.
+
+Open-source contributions integrated:
+
+- FreeBSD platform support patches.
+- AIX weak memory model patch.
+
+Bugs fixed:
+
+- 108 - Removed broken affinity.h reference.
+- 101 - Does not build on Debian Lenny (replaced arch with uname -m).
+
+------------------------------------------------------------------------
+20071030 open-source release
+
+Changes (w.r.t. previous open-source release):
+
+- More work on concurrent_vector.
+- Better support for building with -Wall -Werror (or not) as desired.
+- A few fixes to eliminate extraneous warnings.
+- Begin introduction of versioning hooks so that the internal/API
+    version is tracked via TBB_INTERFACE_VERSION.  The newest binary
+    libraries should always work with previously-compiled code when-
+    ever possible.
+- Engineering work in progress on using futex inside the mutexes (Linux).
+- Engineering work in progress on exception behavior.
+- Engineering work in progress on a new parallel_do algorithm.
+- Unittest improvements.
+
+------------------------------------------------------------------------
+20070927 open-source release
+
+Changes (w.r.t. TBB 2.0 U1 commercial-aligned release):
+
+- Minor update to TBB 2.0 U1 below.
+- Begin introduction of new concurrent_vector interfaces not released
+    with TBB 2.0 U1.
+
+------------------------------------------------------------------------
+TBB 2.0 U1 commercial-aligned release (package 014, October 1, 2007)
+
+Corresponds to commercial 014 (for Linux*, 016) packages.
+
+Changes (w.r.t. TBB 2.0 commercial-aligned release):
+
+- All open-source-release changes down to, and including, TBB 2.0 below,
+    were incorporated into this release.
+- Made a number of changes to the officially supported OS list:
+    Added Linux* OSs:
+       Asianux* 3, Debian* 4.0, Fedora Core* 6, Fedora* 7,
+       Turbo Linux* 11, Ubuntu* 7.04;
+    Dropped Linux* OSs:
+       Asianux* 2, Fedora Core* 4, Haansoft* Linux 2006 Server,
+       Mandriva/Mandrake* 10.1, Miracle Linux* 4.0,
+       Red Flag* DC Server 5.0;
+    Only Mac OS* X 10.4.9 (and forward) and Xcode* tool suite 2.4.1 (and
+       forward) are now supported.
+- Commercial installers on Linux* fixed to recommend the correct
+    binaries to use in more cases, with less unnecessary warnings.
+- Changes to eliminate spurious build warnings.
+
+Open-source contributions integrated:
+
+- Two small header guard macro patches; it also fixed bug #94.
+- New blocked_range3d class.
+
+Bugs fixed:
+
+- 93 - Removed misleading comments in task.h.
+- 94 - See above.
+
+------------------------------------------------------------------------
+20070815 open-source release
+
+Changes:
+
+- Changes to eliminate spurious build warnings.
+- Engineering work in progress on concurrent_vector allocator behavior.
+- Added hooks to use the Intel(R) compiler code coverage tools.
+
+Open-source contributions integrated:
+
+- Mac OS* X build warning patch.
+
+Bugs fixed:
+
+- 88 - Fixed TBB compilation errors if both VS2005 and Windows SDK are
+    installed.
+
+------------------------------------------------------------------------
+20070719 open-source release
+
+Changes:
+
+- Minor update to TBB 2.0 commercial-aligned release below.
+- Changes to eliminate spurious build warnings.
+
+------------------------------------------------------------------------
+TBB 2.0 commercial-aligned release (package 010, July 19, 2007)
+
+Corresponds to commercial 010 (for Linux*, 012) packages.
+
+- TBB open-source debut release.
+
+------------------------------------------------------------------------
+TBB 1.1 commercial release (April 10, 2007)
+
+Changes (w.r.t. TBB 1.0 commercial release):
+
+- auto_partitioner which offered an automatic alternative to specifying
+    a grain size parameter to estimate the best granularity for tasks.
+- The release was added to the Intel(R) C++ Compiler 10.0 Pro.
+
+------------------------------------------------------------------------
+TBB 1.0 Update 2 commercial release
+
+Changes (w.r.t. TBB 1.0 Update 1 commercial release):
+
+- Mac OS* X 64-bit support added.
+- Source packages for commercial releases introduced.
+
+------------------------------------------------------------------------
+TBB 1.0 Update 1 commercial-aligned release
+
+Changes (w.r.t. TBB 1.0 commercial release):
+
+- Fix for critical package issue on Mac OS* X.
+
+------------------------------------------------------------------------
+TBB 1.0 commercial release (August 29, 2006)
+
+Changes (w.r.t. TBB 1.0 beta commercial release):
+
+- New namespace (and compatibility headers for old namespace).
+    Namespaces are tbb and tbb::internal and all classes are in the
+    underscore_style not the WindowsStyle.
+- New class: scalable_allocator (and cache_aligned_allocator using that
+    if it exists).
+- Added parallel_for/tacheon example.
+- Removed C-style casts from headers for better C++ compliance.
+- Bug fixes.
+- Documentation improvements.
+- Improved performance of the concurrent_hash_map class.
+- Upgraded parallel_sort() to support STL-style random-access iterators
+    instead of just pointers.
+- The Windows vs7_1 directories renamed to vs7.1 in examples.
+- New class: spin version of reader-writer lock.
+- Added push_back() interface to concurrent_vector().
+
+------------------------------------------------------------------------
+TBB 1.0 beta commercial release
+
+Initial release.
+
+Features / APIs:
+
+- Concurrent containers: ConcurrentHashTable, ConcurrentVector,
+    ConcurrentQueue.
+- Parallel algorithms: ParallelFor, ParallelReduce, ParallelScan,
+    ParallelWhile, Pipeline, ParallelSort.
+- Support: AlignedSpace, BlockedRange (i.e., 1D), BlockedRange2D
+- Task scheduler with multi-master support.
+- Atomics: read, write, fetch-and-store, fetch-and-add, compare-and-swap.
+- Locks: spin, reader-writer, queuing, OS-wrapper.
+- Memory allocation: STL-style memory allocator that avoids false
+    sharing.
+- Timers.
+
+Tools Support:
+- Thread Checker 3.0.
+- Thread Profiler 3.0.
+
+Documentation:
+- First Use Documents: README.txt, INSTALL.txt, Release_Notes.txt,
+    Doc_Index.html, Getting_Started.pdf, Tutorial.pdf, Reference.pdf.
+- Class hierarchy HTML pages (Doxygen).
+- Tree of index.html pages for navigating the installed package, esp.
+    for the examples.
+
+Examples:
+- One for each of these TBB features: ConcurrentHashTable, ParallelFor,
+    ParallelReduce, ParallelWhile, Pipeline, Task.
+- Live copies of examples from Getting_Started.pdf.
+- TestAll example that exercises every class and header in the package
+    (i.e., a "liveness test").
+- Compilers: see Release_Notes.txt.
+- APIs: OpenMP, WinThreads, Pthreads.
+
+Packaging:
+- Package for Windows installs IA-32 and EM64T bits.
+- Package for Linux installs IA-32, EM64T and IPF bits.
+- Package for Mac OS* X installs IA-32 bits.
+- All packages support Intel(R) software setup assistant (ISSA) and
+    install-time FLEXlm license checking.
+- ISSA support allows license file to be specified directly in case of
+    no Internet connection or problems with IRC or serial #s.
+- Linux installer allows root or non-root, RPM or non-RPM installs.
+- FLEXlm license servers (for those who need floating/counted licenses)
+    are provided separately on Intel(R) Premier.
+
+------------------------------------------------------------------------
+* Other names and brands may be claimed as the property of others.
diff --git a/tbb/COPYING b/tbb/COPYING
new file mode 100644 (file)
index 0000000..5af6ed8
--- /dev/null
@@ -0,0 +1,353 @@
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
+----------------     END OF Gnu General Public License     ----------------
+
+The source code of Threading Building Blocks is distributed under version 2
+of the GNU General Public License, with the so-called "runtime exception,"
+as follows (or see any header or implementation file):
+
+   As a special exception, you may use this file as part of a free software
+   library without restriction.  Specifically, if other files instantiate
+   templates or use macros or inline functions from this file, or you compile
+   this file and link it with other files to produce an executable, this
+   file does not by itself cause the resulting executable to be covered by
+   the GNU General Public License.  This exception does not however
+   invalidate any other reasons why the executable file might be covered by
+   the GNU General Public License.
diff --git a/tbb/README b/tbb/README
new file mode 100644 (file)
index 0000000..67ab8ad
--- /dev/null
@@ -0,0 +1,11 @@
+Threading Building Blocks - README
+
+See index.html for directions and documentation.
+
+If source is present (./Makefile and src/ directories),
+type 'gmake' in this directory to build and test.
+
+See examples/index.html for runnable examples and directions.
+
+See http://threadingbuildingblocks.org for full documentation
+and software information.
diff --git a/tbb/bin/ia32/vc10/irml/irml.dll b/tbb/bin/ia32/vc10/irml/irml.dll
new file mode 100644 (file)
index 0000000..2bbcfbd
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml/irml.dll differ
diff --git a/tbb/bin/ia32/vc10/irml/irml.pdb b/tbb/bin/ia32/vc10/irml/irml.pdb
new file mode 100644 (file)
index 0000000..462f8a9
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml/irml.pdb differ
diff --git a/tbb/bin/ia32/vc10/irml/irml_debug.dll b/tbb/bin/ia32/vc10/irml/irml_debug.dll
new file mode 100644 (file)
index 0000000..c07c6ad
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml/irml_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/irml/irml_debug.pdb b/tbb/bin/ia32/vc10/irml/irml_debug.pdb
new file mode 100644 (file)
index 0000000..361fea8
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml/irml_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/irml_c/irml.dll b/tbb/bin/ia32/vc10/irml_c/irml.dll
new file mode 100644 (file)
index 0000000..20e67ac
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml_c/irml.dll differ
diff --git a/tbb/bin/ia32/vc10/irml_c/irml.pdb b/tbb/bin/ia32/vc10/irml_c/irml.pdb
new file mode 100644 (file)
index 0000000..f4a40b9
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml_c/irml.pdb differ
diff --git a/tbb/bin/ia32/vc10/irml_c/irml_debug.dll b/tbb/bin/ia32/vc10/irml_c/irml_debug.dll
new file mode 100644 (file)
index 0000000..f1f0797
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml_c/irml_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/irml_c/irml_debug.pdb b/tbb/bin/ia32/vc10/irml_c/irml_debug.pdb
new file mode 100644 (file)
index 0000000..57fa7b0
Binary files /dev/null and b/tbb/bin/ia32/vc10/irml_c/irml_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbb.dll b/tbb/bin/ia32/vc10/tbb.dll
new file mode 100644 (file)
index 0000000..5f4f57a
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb.dll differ
diff --git a/tbb/bin/ia32/vc10/tbb.pdb b/tbb/bin/ia32/vc10/tbb.pdb
new file mode 100644 (file)
index 0000000..10581cf
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbb_debug.dll b/tbb/bin/ia32/vc10/tbb_debug.dll
new file mode 100644 (file)
index 0000000..f71394d
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/tbb_debug.pdb b/tbb/bin/ia32/vc10/tbb_debug.pdb
new file mode 100644 (file)
index 0000000..e3f4cc3
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbb_preview.dll b/tbb/bin/ia32/vc10/tbb_preview.dll
new file mode 100644 (file)
index 0000000..0cf5262
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_preview.dll differ
diff --git a/tbb/bin/ia32/vc10/tbb_preview.pdb b/tbb/bin/ia32/vc10/tbb_preview.pdb
new file mode 100644 (file)
index 0000000..75215bf
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_preview.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbb_preview_debug.dll b/tbb/bin/ia32/vc10/tbb_preview_debug.dll
new file mode 100644 (file)
index 0000000..878e3f0
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_preview_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/tbb_preview_debug.pdb b/tbb/bin/ia32/vc10/tbb_preview_debug.pdb
new file mode 100644 (file)
index 0000000..f593918
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbb_preview_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc.dll b/tbb/bin/ia32/vc10/tbbmalloc.dll
new file mode 100644 (file)
index 0000000..60e7c40
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc.dll differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc.pdb b/tbb/bin/ia32/vc10/tbbmalloc.pdb
new file mode 100644 (file)
index 0000000..0c13283
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_debug.dll b/tbb/bin/ia32/vc10/tbbmalloc_debug.dll
new file mode 100644 (file)
index 0000000..bc94d62
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_debug.pdb b/tbb/bin/ia32/vc10/tbbmalloc_debug.pdb
new file mode 100644 (file)
index 0000000..310d827
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_proxy.dll b/tbb/bin/ia32/vc10/tbbmalloc_proxy.dll
new file mode 100644 (file)
index 0000000..eae894f
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_proxy.dll differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_proxy.pdb b/tbb/bin/ia32/vc10/tbbmalloc_proxy.pdb
new file mode 100644 (file)
index 0000000..cb072eb
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_proxy.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.dll b/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.dll
new file mode 100644 (file)
index 0000000..439bdfe
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.dll differ
diff --git a/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.pdb b/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.pdb
new file mode 100644 (file)
index 0000000..8150fc5
Binary files /dev/null and b/tbb/bin/ia32/vc10/tbbmalloc_proxy_debug.pdb differ
diff --git a/tbb/bin/ia32/vc10/tbbvars.bat b/tbb/bin/ia32/vc10/tbbvars.bat
new file mode 100644 (file)
index 0000000..2d5e7cf
--- /dev/null
@@ -0,0 +1,33 @@
+@echo off
+REM
+REM Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+REM
+REM This file is part of Threading Building Blocks.
+REM
+REM Threading Building Blocks is free software; you can redistribute it
+REM and/or modify it under the terms of the GNU General Public License
+REM version 2 as published by the Free Software Foundation.
+REM
+REM Threading Building Blocks is distributed in the hope that it will be
+REM useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+REM of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+REM GNU General Public License for more details.
+REM
+REM You should have received a copy of the GNU General Public License
+REM along with Threading Building Blocks; if not, write to the Free Software
+REM Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+REM
+REM As a special exception, you may use this file as part of a free software
+REM library without restriction.  Specifically, if other files instantiate
+REM templates or use macros or inline functions from this file, or you compile
+REM this file and link it with other files to produce an executable, this
+REM file does not by itself cause the resulting executable to be covered by
+REM the GNU General Public License.  This exception does not however
+REM invalidate any other reasons why the executable file might be covered by
+REM the GNU General Public License.
+REM
+SET TBB30_INSTALL_DIR=SUBSTITUTE_INSTALL_DIR_HERE
+SET TBB_ARCH_PLATFORM=ia32\vc10
+SET PATH=%TBB30_INSTALL_DIR%\bin\%TBB_ARCH_PLATFORM%;%PATH%
+SET LIB=%TBB30_INSTALL_DIR%\lib\%TBB_ARCH_PLATFORM%;%LIB%
+SET INCLUDE=%TBB30_INSTALL_DIR%\include;%INCLUDE%
diff --git a/tbb/bin/ia32/vc10/tbbvars.csh b/tbb/bin/ia32/vc10/tbbvars.csh
new file mode 100644 (file)
index 0000000..3534ab3
--- /dev/null
@@ -0,0 +1,45 @@
+#!/bin/csh
+#
+# Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+#
+# This file is part of Threading Building Blocks.
+#
+# Threading Building Blocks is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# Threading Building Blocks is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Threading Building Blocks; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+# As a special exception, you may use this file as part of a free software
+# library without restriction.  Specifically, if other files instantiate
+# templates or use macros or inline functions from this file, or you compile
+# this file and link it with other files to produce an executable, this
+# file does not by itself cause the resulting executable to be covered by
+# the GNU General Public License.  This exception does not however
+# invalidate any other reasons why the executable file might be covered by
+# the GNU General Public License.
+
+setenv TBB30_INSTALL_DIR "SUBSTITUTE_INSTALL_DIR_HERE"
+setenv TBB_ARCH_PLATFORM "ia32\vc10"
+if (! $?PATH) then
+    setenv PATH "${TBB30_INSTALL_DIR}\bin\${TBB_ARCH_PLATFORM}"
+else
+    setenv PATH "${TBB30_INSTALL_DIR}\bin\${TBB_ARCH_PLATFORM};$PATH"
+endif
+if (! $?LIB) then
+    setenv LIB "${TBB30_INSTALL_DIR}\lib\${TBB_ARCH_PLATFORM}"
+else
+    setenv LIB "${TBB30_INSTALL_DIR}\lib\${TBB_ARCH_PLATFORM};$LIB"
+endif
+if (! $?INCLUDE) then
+    setenv INCLUDE "${TBB30_INSTALL_DIR}\include"
+else
+    setenv INCLUDE "${TBB30_INSTALL_DIR}\include;$INCLUDE"
+endif
diff --git a/tbb/bin/ia32/vc10/tbbvars.sh b/tbb/bin/ia32/vc10/tbbvars.sh
new file mode 100644 (file)
index 0000000..124ffcb
--- /dev/null
@@ -0,0 +1,45 @@
+#!/bin/sh
+#
+# Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+#
+# This file is part of Threading Building Blocks.
+#
+# Threading Building Blocks is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# Threading Building Blocks is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Threading Building Blocks; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+# As a special exception, you may use this file as part of a free software
+# library without restriction.  Specifically, if other files instantiate
+# templates or use macros or inline functions from this file, or you compile
+# this file and link it with other files to produce an executable, this
+# file does not by itself cause the resulting executable to be covered by
+# the GNU General Public License.  This exception does not however
+# invalidate any other reasons why the executable file might be covered by
+# the GNU General Public License.
+
+TBB30_INSTALL_DIR="SUBSTITUTE_SH_INSTALL_DIR_HERE"; export TBB30_INSTALL_DIR
+TBB_ARCH_PLATFORM="ia32/vc10"
+if [ -z "${PATH}" ]; then
+    PATH="${TBB30_INSTALL_DIR}/bin/${TBB_ARCH_PLATFORM}"; export PATH
+else
+    PATH="${TBB30_INSTALL_DIR}/bin/${TBB_ARCH_PLATFORM};$PATH"; export PATH
+fi
+if [ -z "${LIB}" ]; then
+    LIB="${TBB30_INSTALL_DIR}/lib/${TBB_ARCH_PLATFORM}"; export LIB
+else
+    LIB="${TBB30_INSTALL_DIR}/lib/${TBB_ARCH_PLATFORM};$LIB"; export LIB
+fi
+if [ -z "${INCLUDE}" ]; then
+    INCLUDE="${TBB30_INSTALL_DIR}/include"; export INCLUDE
+else
+    INCLUDE="${TBB30_INSTALL_DIR}/include;$INCLUDE"; export INCLUDE
+fi
diff --git a/tbb/bin/tbbvars.bat b/tbb/bin/tbbvars.bat
new file mode 100644 (file)
index 0000000..f30ec86
--- /dev/null
@@ -0,0 +1,75 @@
+@echo off
+REM
+REM Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+REM
+REM This file is part of Threading Building Blocks.
+REM
+REM Threading Building Blocks is free software; you can redistribute it
+REM and/or modify it under the terms of the GNU General Public License
+REM version 2 as published by the Free Software Foundation.
+REM
+REM Threading Building Blocks is distributed in the hope that it will be
+REM useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+REM of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+REM GNU General Public License for more details.
+REM
+REM You should have received a copy of the GNU General Public License
+REM along with Threading Building Blocks; if not, write to the Free Software
+REM Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+REM
+REM As a special exception, you may use this file as part of a free software
+REM library without restriction.  Specifically, if other files instantiate
+REM templates or use macros or inline functions from this file, or you compile
+REM this file and link it with other files to produce an executable, this
+REM file does not by itself cause the resulting executable to be covered by
+REM the GNU General Public License.  This exception does not however
+REM invalidate any other reasons why the executable file might be covered by
+REM the GNU General Public License.
+REM
+
+set SCRIPT_NAME=%~nx0
+if (%1) == () goto Syntax
+
+SET TBB30_BIN_DIR=%~d0%~p0
+
+SET TBB30_INSTALL_DIR=%TBB30_BIN_DIR%..
+
+:ParseArgs
+:: Parse the incoming arguments
+if /i "%1"==""        goto SetEnv
+if /i "%1"=="ia32"         (set TBB_TARGET_ARCH=ia32)    & shift & goto ParseArgs
+if /i "%1"=="intel64"      (set TBB_TARGET_ARCH=intel64) & shift & goto ParseArgs
+if /i "%1"=="vs2005"       (set TBB_TARGET_VS=vc8)       & shift & goto ParseArgs
+if /i "%1"=="vs2008"       (set TBB_TARGET_VS=vc9)       & shift & goto ParseArgs
+if /i "%1"=="vs2010"       (set TBB_TARGET_VS=vc10)      & shift & goto ParseArgs
+if /i "%1"=="all"          (set TBB_TARGET_VS=vc_mt)     & shift & goto ParseArgs
+
+:SetEnv
+
+if  ("%TBB_TARGET_VS%") == ("") set TBB_TARGET_VS=vc_mt
+
+SET TBB_ARCH_PLATFORM=%TBB_TARGET_ARCH%\%TBB_TARGET_VS%
+if exist "%TBB30_BIN_DIR%\%TBB_ARCH_PLATFORM%\tbb.dll" SET PATH=%TBB30_BIN_DIR%\%TBB_ARCH_PLATFORM%;%PATH%
+if exist "%TBB30_INSTALL_DIR%\..\redist\%TBB_TARGET_ARCH%\tbb\%TBB_TARGET_VS%\tbb.dll" SET PATH=%TBB30_INSTALL_DIR%\..\redist\%TBB_TARGET_ARCH%\tbb\%TBB_TARGET_VS%;%PATH%
+SET LIB=%TBB30_INSTALL_DIR%\lib\%TBB_ARCH_PLATFORM%;%LIB%
+SET INCLUDE=%TBB30_INSTALL_DIR%\include;%INCLUDE%
+IF ("%ICPP_COMPILER11%") NEQ ("") SET TBB_CXX=icl.exe
+IF ("%ICPP_COMPILER12%") NEQ ("") SET TBB_CXX=icl.exe
+goto End
+
+:Syntax
+echo Syntax:
+echo  %SCRIPT_NAME% ^<arch^> ^<vs^>
+echo    ^<arch^> must be is one of the following
+echo        ia32         : Set up for IA-32  architecture
+echo        intel64      : Set up for Intel(R) 64  architecture
+echo    ^<vs^> should be one of the following
+echo        vs2005      : Set to use with Microsoft Visual Studio 2005 runtime DLLs
+echo        vs2008      : Set to use with Microsoft Visual Studio 2008 runtime DLLs
+echo        vs2010      : Set to use with Microsoft Visual Studio 2010 runtime DLLs
+echo        all         : Set to use TBB statically linked with Microsoft Visual C++ runtime
+echo    if ^<vs^> is not set TBB statically linked with Microsoft Visual C++ runtime will be used.
+exit /B 1
+
+:End
+exit /B 0
\ No newline at end of file
diff --git a/tbb/include/index.html b/tbb/include/index.html
new file mode 100644 (file)
index 0000000..98353f0
--- /dev/null
@@ -0,0 +1,24 @@
+<HTML>
+<BODY>
+
+<H2>Overview</H2>
+Include files for Threading Building Blocks.
+
+<H2>Directories</H2>
+<DL>
+<DT><A HREF="tbb/index.html">tbb</A>
+<DD>Include files for Threading Building Blocks classes and functions.
+</DL>
+
+<HR>
+<A HREF="../index.html">Up to parent directory</A>
+<p></p>
+Copyright &copy; 2005-2011 Intel Corporation.  All Rights Reserved.
+<p></p>
+Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are 
+registered trademarks or trademarks of Intel Corporation or its 
+subsidiaries in the United States and other countries. 
+<p></p>
+* Other names and brands may be claimed as the property of others.
+</BODY>
+</HTML>
diff --git a/tbb/include/tbb/_aggregator_internal.h b/tbb/include/tbb/_aggregator_internal.h
new file mode 100644 (file)
index 0000000..36d1f46
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_aggregator_internal_H
+#define __TBB_aggregator_internal_H
+
+#include "atomic.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+namespace interface6 {
+namespace internal {
+
+using namespace tbb::internal;
+
+//! aggregated_operation base class
+template <typename Derived>
+class aggregated_operation {
+ public:
+    uintptr_t status;
+    Derived *next;
+    aggregated_operation() : status(0), next(NULL) {}
+};
+
+//! Aggregator base class
+/** An aggregator for collecting operations coming from multiple sources and executing
+    them serially on a single thread.  operation_type must be derived from
+    aggregated_operation. The parameter handler_type is a functor that will be passed the
+    list of operations and is expected to handle each operation appropriately, setting the
+    status of each operation to non-zero.*/
+ template < typename handler_type, typename operation_type >
+class aggregator {
+ public:
+    aggregator() : handler_busy(false) { pending_operations = NULL; }
+    explicit aggregator(handler_type h) : handler_busy(false), handle_operations(h) {
+        pending_operations = NULL; 
+    }
+
+    void initialize_handler(handler_type h) { handle_operations = h; }
+
+    //! Place operation in list
+    /** Place operation in list and either handle list or wait for operation to
+        complete.  */
+    void execute(operation_type *op) {
+        operation_type *res;
+
+        // ITT note: &(op->status) tag is used to cover accesses to this op node. This
+        // thread has created the operation, and now releases it so that the handler
+        // thread may handle the associated operation w/o triggering a race condition;
+        // thus this tag will be acquired just before the operation is handled in the
+        // handle_operations functor.
+        call_itt_notify(releasing, &(op->status));
+        // insert the operation in the queue
+        do {
+            // ITT may flag the following line as a race; it is a false positive:
+            // This is an atomic read; we don't provide itt_hide_load_word for atomics
+            op->next = res = pending_operations; // NOT A RACE 
+        } while (pending_operations.compare_and_swap(op, res) != res);
+        if (!res) { // first in the list; handle the operations
+            // ITT note: &pending_operations tag covers access to the handler_busy flag,
+            // which this waiting handler thread will try to set before entering
+            // handle_operations.
+            call_itt_notify(acquired, &pending_operations);
+            start_handle_operations();
+            __TBB_ASSERT(op->status, NULL);
+        }
+        else { // not first; wait for op to be ready
+            call_itt_notify(prepare, &(op->status));
+            spin_wait_while_eq(op->status, uintptr_t(0));
+            itt_load_word_with_acquire(op->status);
+        }
+    }
+
+ private:
+    //! An atomically updated list (aka mailbox) of pending operations
+    atomic<operation_type *> pending_operations;
+    //! Controls thread access to handle_operations
+    uintptr_t handler_busy;
+    handler_type handle_operations;
+
+    //! Trigger the handling of operations when the handler is free
+    void start_handle_operations() {
+        operation_type *op_list;
+
+        // ITT note: &handler_busy tag covers access to pending_operations as it is passed
+        // between active and waiting handlers.  Below, the waiting handler waits until
+        // the active handler releases, and the waiting handler acquires &handler_busy as
+        // it becomes the active_handler. The release point is at the end of this
+        // function, when all operations in pending_operations have been handled by the
+        // owner of this aggregator.
+        call_itt_notify(prepare, &handler_busy);
+        // get the handler_busy:
+        // only one thread can possibly spin here at a time
+        spin_wait_until_eq(handler_busy, uintptr_t(0));
+        call_itt_notify(acquired, &handler_busy);
+        // acquire fence not necessary here due to causality rule and surrounding atomics
+        __TBB_store_with_release(handler_busy, uintptr_t(1));
+
+        // ITT note: &pending_operations tag covers access to the handler_busy flag
+        // itself. Capturing the state of the pending_operations signifies that
+        // handler_busy has been set and a new active handler will now process that list's
+        // operations.
+        call_itt_notify(releasing, &pending_operations);
+        // grab pending_operations
+        op_list = pending_operations.fetch_and_store(NULL);
+
+        // handle all the operations
+        handle_operations(op_list);
+
+        // release the handler
+        itt_store_word_with_release(handler_busy, uintptr_t(0));
+    }
+};
+
+// the most-compatible friend declaration (vs, gcc, icc) is
+//    template<class U, class V> friend class aggregating_functor;
+template<typename aggregating_class, typename operation_list>
+class aggregating_functor {
+    aggregating_class *fi;
+public:
+    aggregating_functor() {}
+    aggregating_functor(aggregating_class *fi_) : fi(fi_) {}
+    void operator()(operation_list* op_list) { fi->handle_operations(op_list); }
+};
+
+} // namespace internal
+} // namespace interface6
+
+namespace internal {
+    using interface6::internal::aggregated_operation;
+    using interface6::internal::aggregator;
+    using interface6::internal::aggregating_functor;
+} // namespace internal
+
+} // namespace tbb
+
+#endif
diff --git a/tbb/include/tbb/_concurrent_queue_internal.h b/tbb/include/tbb/_concurrent_queue_internal.h
new file mode 100644 (file)
index 0000000..72754e1
--- /dev/null
@@ -0,0 +1,1019 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_concurrent_queue_internal_H
+#define __TBB_concurrent_queue_internal_H
+
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+#include "atomic.h"
+#include "spin_mutex.h"
+#include "cache_aligned_allocator.h"
+#include "tbb_exception.h"
+#include "tbb_profiling.h"
+#include <new>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <iterator>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+namespace tbb {
+
+#if !__TBB_TEMPLATE_FRIENDS_BROKEN
+
+// forward declaration
+namespace strict_ppl {
+template<typename T, typename A> class concurrent_queue;
+}
+
+template<typename T, typename A> class concurrent_bounded_queue;
+
+namespace deprecated {
+template<typename T, typename A> class concurrent_queue;
+}
+#endif
+
+//! For internal use only.
+namespace strict_ppl {
+
+//! @cond INTERNAL
+namespace internal {
+
+using namespace tbb::internal;
+
+typedef size_t ticket;
+
+template<typename T> class micro_queue ;
+template<typename T> class micro_queue_pop_finalizer ;
+template<typename T> class concurrent_queue_base_v3;
+
+//! parts of concurrent_queue_rep that do not have references to micro_queue
+/**
+ * For internal use only.
+ */
+struct concurrent_queue_rep_base : no_copy {
+    template<typename T> friend class micro_queue;
+    template<typename T> friend class concurrent_queue_base_v3;
+
+protected:
+    //! Approximately n_queue/golden ratio
+    static const size_t phi = 3;
+
+public:
+    // must be power of 2
+    static const size_t n_queue = 8;
+
+    //! Prefix on a page
+    struct page {
+        page* next;
+        uintptr_t mask; 
+    };
+
+    atomic<ticket> head_counter;
+    char pad1[NFS_MaxLineSize-sizeof(atomic<ticket>)];
+    atomic<ticket> tail_counter;
+    char pad2[NFS_MaxLineSize-sizeof(atomic<ticket>)];
+
+    //! Always a power of 2
+    size_t items_per_page;
+
+    //! Size of an item
+    size_t item_size;
+
+    //! number of invalid entries in the queue
+    atomic<size_t> n_invalid_entries;
+
+    char pad3[NFS_MaxLineSize-sizeof(size_t)-sizeof(size_t)-sizeof(atomic<size_t>)];
+} ;
+
+inline bool is_valid_page(const concurrent_queue_rep_base::page* p) {
+    return uintptr_t(p)>1;
+}
+
+//! Abstract class to define interface for page allocation/deallocation
+/**
+ * For internal use only.
+ */
+class concurrent_queue_page_allocator
+{
+    template<typename T> friend class micro_queue ;
+    template<typename T> friend class micro_queue_pop_finalizer ;
+protected:
+    virtual ~concurrent_queue_page_allocator() {}
+private:
+    virtual concurrent_queue_rep_base::page* allocate_page() = 0;
+    virtual void deallocate_page( concurrent_queue_rep_base::page* p ) = 0;
+} ;
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+// unary minus operator applied to unsigned type, result still unsigned
+#pragma warning( push )
+#pragma warning( disable: 4146 )
+#endif
+
+//! A queue using simple locking.
+/** For efficiency, this class has no constructor.
+    The caller is expected to zero-initialize it. */
+template<typename T>
+class micro_queue : no_copy {
+    typedef concurrent_queue_rep_base::page page;
+
+    //! Class used to ensure exception-safety of method "pop" 
+    class destroyer: no_copy {
+        T& my_value;
+    public:
+        destroyer( T& value ) : my_value(value) {}
+        ~destroyer() {my_value.~T();}          
+    };
+
+    void copy_item( page& dst, size_t index, const void* src ) {
+        new( &get_ref(dst,index) ) T(*static_cast<const T*>(src)); 
+    }
+
+    void copy_item( page& dst, size_t dindex, const page& src, size_t sindex ) {
+        new( &get_ref(dst,dindex) ) T( get_ref(const_cast<page&>(src),sindex) );
+    }
+
+    void assign_and_destroy_item( void* dst, page& src, size_t index ) {
+        T& from = get_ref(src,index);
+        destroyer d(from);
+        *static_cast<T*>(dst) = from;
+    }
+
+    void spin_wait_until_my_turn( atomic<ticket>& counter, ticket k, concurrent_queue_rep_base& rb ) const ;
+
+public:
+    friend class micro_queue_pop_finalizer<T>;
+
+    struct padded_page: page {
+        //! Not defined anywhere - exists to quiet warnings.
+        padded_page(); 
+        //! Not defined anywhere - exists to quiet warnings.
+        void operator=( const padded_page& );
+        //! Must be last field.
+        T last;
+    };
+
+    static T& get_ref( page& p, size_t index ) {
+        return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
+    }
+
+    atomic<page*> head_page;
+    atomic<ticket> head_counter;
+
+    atomic<page*> tail_page;
+    atomic<ticket> tail_counter;
+
+    spin_mutex page_mutex;
+    
+    void push( const void* item, ticket k, concurrent_queue_base_v3<T>& base ) ;
+
+    bool pop( void* dst, ticket k, concurrent_queue_base_v3<T>& base ) ;
+
+    micro_queue& assign( const micro_queue& src, concurrent_queue_base_v3<T>& base ) ;
+
+    page* make_copy( concurrent_queue_base_v3<T>& base, const page* src_page, size_t begin_in_page, size_t end_in_page, ticket& g_index ) ;
+
+    void invalidate_page_and_rethrow( ticket k ) ;
+};
+
+template<typename T>
+void micro_queue<T>::spin_wait_until_my_turn( atomic<ticket>& counter, ticket k, concurrent_queue_rep_base& rb ) const {
+    atomic_backoff backoff;
+    do {
+        backoff.pause();
+        if( counter&1 ) {
+            ++rb.n_invalid_entries;
+            throw_exception( eid_bad_last_alloc );
+        }
+    } while( counter!=k ) ;
+}
+
+template<typename T>
+void micro_queue<T>::push( const void* item, ticket k, concurrent_queue_base_v3<T>& base ) {
+    k &= -concurrent_queue_rep_base::n_queue;
+    page* p = NULL;
+    size_t index = k/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1);
+    if( !index ) {
+        __TBB_TRY {
+            concurrent_queue_page_allocator& pa = base;
+            p = pa.allocate_page();
+        } __TBB_CATCH (...) {
+            ++base.my_rep->n_invalid_entries;
+            invalidate_page_and_rethrow( k );
+        }
+        p->mask = 0;
+        p->next = NULL;
+    }
+
+    if( tail_counter!=k ) spin_wait_until_my_turn( tail_counter, k, *base.my_rep );
+    call_itt_notify(acquired, &tail_counter);
+        
+    if( p ) {
+        spin_mutex::scoped_lock lock( page_mutex );
+        page* q = tail_page;
+        if( is_valid_page(q) )
+            q->next = p;
+        else
+            head_page = p; 
+        tail_page = p;
+    } else {
+        p = tail_page;
+    }
+    __TBB_TRY {
+        copy_item( *p, index, item );
+        // If no exception was thrown, mark item as present.
+        itt_hide_store_word(p->mask,  p->mask | uintptr_t(1)<<index);
+        call_itt_notify(releasing, &tail_counter);
+        tail_counter += concurrent_queue_rep_base::n_queue; 
+    } __TBB_CATCH (...) {
+        ++base.my_rep->n_invalid_entries;
+        call_itt_notify(releasing, &tail_counter);
+        tail_counter += concurrent_queue_rep_base::n_queue; 
+        __TBB_RETHROW();
+    }
+}
+
+template<typename T>
+bool micro_queue<T>::pop( void* dst, ticket k, concurrent_queue_base_v3<T>& base ) {
+    k &= -concurrent_queue_rep_base::n_queue;
+    if( head_counter!=k ) spin_wait_until_eq( head_counter, k );
+    call_itt_notify(acquired, &head_counter);
+    if( tail_counter==k ) spin_wait_while_eq( tail_counter, k );
+    call_itt_notify(acquired, &tail_counter);
+    page& p = *head_page;
+    __TBB_ASSERT( &p, NULL );
+    size_t index = k/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1);
+    bool success = false; 
+    {
+        micro_queue_pop_finalizer<T> finalizer( *this, base, k+concurrent_queue_rep_base::n_queue, index==base.my_rep->items_per_page-1 ? &p : NULL ); 
+        if( p.mask & uintptr_t(1)<<index ) {
+            success = true;
+            assign_and_destroy_item( dst, p, index );
+        } else {
+            --base.my_rep->n_invalid_entries;
+        }
+    }
+    return success;
+}
+
+template<typename T>
+micro_queue<T>& micro_queue<T>::assign( const micro_queue<T>& src, concurrent_queue_base_v3<T>& base ) {
+    head_counter = src.head_counter;
+    tail_counter = src.tail_counter;
+    page_mutex   = src.page_mutex;
+
+    const page* srcp = src.head_page;
+    if( is_valid_page(srcp) ) {
+        ticket g_index = head_counter;
+        __TBB_TRY {
+            size_t n_items  = (tail_counter-head_counter)/concurrent_queue_rep_base::n_queue;
+            size_t index = head_counter/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1);
+            size_t end_in_first_page = (index+n_items<base.my_rep->items_per_page)?(index+n_items):base.my_rep->items_per_page;
+
+            head_page = make_copy( base, srcp, index, end_in_first_page, g_index );
+            page* cur_page = head_page;
+
+            if( srcp != src.tail_page ) {
+                for( srcp = srcp->next; srcp!=src.tail_page; srcp=srcp->next ) {
+                    cur_page->next = make_copy( base, srcp, 0, base.my_rep->items_per_page, g_index );
+                    cur_page = cur_page->next;
+                }
+
+                __TBB_ASSERT( srcp==src.tail_page, NULL );
+                size_t last_index = tail_counter/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1);
+                if( last_index==0 ) last_index = base.my_rep->items_per_page;
+
+                cur_page->next = make_copy( base, srcp, 0, last_index, g_index );
+                cur_page = cur_page->next;
+            }
+            tail_page = cur_page;
+        } __TBB_CATCH (...) {
+            invalidate_page_and_rethrow( g_index );
+        }
+    } else {
+        head_page = tail_page = NULL;
+    }
+    return *this;
+}
+
+template<typename T>
+void micro_queue<T>::invalidate_page_and_rethrow( ticket k ) {
+    // Append an invalid page at address 1 so that no more pushes are allowed.
+    page* invalid_page = (page*)uintptr_t(1);
+    {
+        spin_mutex::scoped_lock lock( page_mutex );
+        itt_store_word_with_release(tail_counter, k+concurrent_queue_rep_base::n_queue+1);
+        page* q = tail_page;
+        if( is_valid_page(q) )
+            q->next = invalid_page;
+        else
+            head_page = invalid_page;
+        tail_page = invalid_page;
+    }
+    __TBB_RETHROW();
+}
+
+template<typename T>
+concurrent_queue_rep_base::page* micro_queue<T>::make_copy( concurrent_queue_base_v3<T>& base, const concurrent_queue_rep_base::page* src_page, size_t begin_in_page, size_t end_in_page, ticket& g_index ) {
+    concurrent_queue_page_allocator& pa = base;
+    page* new_page = pa.allocate_page();
+    new_page->next = NULL;
+    new_page->mask = src_page->mask;
+    for( ; begin_in_page!=end_in_page; ++begin_in_page, ++g_index )
+        if( new_page->mask & uintptr_t(1)<<begin_in_page )
+            copy_item( *new_page, begin_in_page, *src_page, begin_in_page );
+    return new_page;
+}
+
+template<typename T>
+class micro_queue_pop_finalizer: no_copy {
+    typedef concurrent_queue_rep_base::page page;
+    ticket my_ticket;
+    micro_queue<T>& my_queue;
+    page* my_page; 
+    concurrent_queue_page_allocator& allocator;
+public:
+    micro_queue_pop_finalizer( micro_queue<T>& queue, concurrent_queue_base_v3<T>& b, ticket k, page* p ) :
+        my_ticket(k), my_queue(queue), my_page(p), allocator(b)
+    {}
+    ~micro_queue_pop_finalizer() ;
+};
+
+template<typename T>
+micro_queue_pop_finalizer<T>::~micro_queue_pop_finalizer() {
+    page* p = my_page;
+    if( is_valid_page(p) ) {
+        spin_mutex::scoped_lock lock( my_queue.page_mutex );
+        page* q = p->next;
+        my_queue.head_page = q;
+        if( !is_valid_page(q) ) {
+            my_queue.tail_page = NULL;
+        }
+    }
+    itt_store_word_with_release(my_queue.head_counter, my_ticket);
+    if( is_valid_page(p) ) {
+        allocator.deallocate_page( p );
+    }
+}
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+#pragma warning( pop )
+#endif // warning 4146 is back
+
+template<typename T> class concurrent_queue_iterator_rep ;
+template<typename T> class concurrent_queue_iterator_base_v3;
+
+//! representation of concurrent_queue_base
+/**
+ * the class inherits from concurrent_queue_rep_base and defines an array of micro_queue<T>'s
+ */
+template<typename T>
+struct concurrent_queue_rep : public concurrent_queue_rep_base {
+    micro_queue<T> array[n_queue];
+
+    //! Map ticket to an array index
+    static size_t index( ticket k ) {
+        return k*phi%n_queue;
+    }
+
+    micro_queue<T>& choose( ticket k ) {
+        // The formula here approximates LRU in a cache-oblivious way.
+        return array[index(k)];
+    }
+};
+
+//! base class of concurrent_queue
+/**
+ * The class implements the interface defined by concurrent_queue_page_allocator
+ * and has a pointer to an instance of concurrent_queue_rep.
+ */
+template<typename T>
+class concurrent_queue_base_v3: public concurrent_queue_page_allocator {
+    //! Internal representation
+    concurrent_queue_rep<T>* my_rep;
+
+    friend struct concurrent_queue_rep<T>;
+    friend class micro_queue<T>;
+    friend class concurrent_queue_iterator_rep<T>;
+    friend class concurrent_queue_iterator_base_v3<T>;
+
+protected:
+    typedef typename concurrent_queue_rep<T>::page page;
+
+private:
+    typedef typename micro_queue<T>::padded_page padded_page;
+
+    /* override */ virtual page *allocate_page() {
+        concurrent_queue_rep<T>& r = *my_rep;
+        size_t n = sizeof(padded_page) + (r.items_per_page-1)*sizeof(T);
+        return reinterpret_cast<page*>(allocate_block ( n ));
+    }
+
+    /* override */ virtual void deallocate_page( concurrent_queue_rep_base::page *p ) {
+        concurrent_queue_rep<T>& r = *my_rep;
+        size_t n = sizeof(padded_page) + (r.items_per_page-1)*sizeof(T);
+        deallocate_block( reinterpret_cast<void*>(p), n );
+    }
+
+    //! custom allocator
+    virtual void *allocate_block( size_t n ) = 0;
+
+    //! custom de-allocator
+    virtual void deallocate_block( void *p, size_t n ) = 0;
+
+protected:
+    concurrent_queue_base_v3();
+
+    /* override */ virtual ~concurrent_queue_base_v3() {
+#if TBB_USE_ASSERT
+        size_t nq = my_rep->n_queue;
+        for( size_t i=0; i<nq; i++ )
+            __TBB_ASSERT( my_rep->array[i].tail_page==NULL, "pages were not freed properly" );
+#endif /* TBB_USE_ASSERT */
+        cache_aligned_allocator<concurrent_queue_rep<T> >().deallocate(my_rep,1);
+    }
+
+    //! Enqueue item at tail of queue
+    void internal_push( const void* src ) {
+        concurrent_queue_rep<T>& r = *my_rep;
+        ticket k = r.tail_counter++;
+        r.choose(k).push( src, k, *this );
+    }
+
+    //! Attempt to dequeue item from queue.
+    /** NULL if there was no item to dequeue. */
+    bool internal_try_pop( void* dst ) ;
+
+    //! Get size of queue; result may be invalid if queue is modified concurrently
+    size_t internal_size() const ;
+
+    //! check if the queue is empty; thread safe
+    bool internal_empty() const ;
+
+    //! free any remaining pages
+    /* note that the name may be misleading, but it remains so due to a historical accident. */
+    void internal_finish_clear() ;
+
+    //! Obsolete
+    void internal_throw_exception() const {
+        throw_exception( eid_bad_alloc );
+    }
+
+    //! copy internal representation
+    void assign( const concurrent_queue_base_v3& src ) ;
+};
+
+template<typename T>
+concurrent_queue_base_v3<T>::concurrent_queue_base_v3() {
+    const size_t item_size = sizeof(T);
+    my_rep = cache_aligned_allocator<concurrent_queue_rep<T> >().allocate(1);
+    __TBB_ASSERT( (size_t)my_rep % NFS_GetLineSize()==0, "alignment error" );
+    __TBB_ASSERT( (size_t)&my_rep->head_counter % NFS_GetLineSize()==0, "alignment error" );
+    __TBB_ASSERT( (size_t)&my_rep->tail_counter % NFS_GetLineSize()==0, "alignment error" );
+    __TBB_ASSERT( (size_t)&my_rep->array % NFS_GetLineSize()==0, "alignment error" );
+    memset(my_rep,0,sizeof(concurrent_queue_rep<T>));
+    my_rep->item_size = item_size;
+    my_rep->items_per_page = item_size<=8 ? 32 :
+                             item_size<=16 ? 16 : 
+                             item_size<=32 ? 8 :
+                             item_size<=64 ? 4 :
+                             item_size<=128 ? 2 :
+                             1;
+}
+
+template<typename T>
+bool concurrent_queue_base_v3<T>::internal_try_pop( void* dst ) {
+    concurrent_queue_rep<T>& r = *my_rep;
+    ticket k;
+    do {
+        k = r.head_counter;
+        for(;;) {
+            if( r.tail_counter<=k ) {
+                // Queue is empty 
+                return false;
+            }
+            // Queue had item with ticket k when we looked.  Attempt to get that item.
+            ticket tk=k;
+#if defined(_MSC_VER) && defined(_Wp64)
+    #pragma warning (push)
+    #pragma warning (disable: 4267)
+#endif
+            k = r.head_counter.compare_and_swap( tk+1, tk );
+#if defined(_MSC_VER) && defined(_Wp64)
+    #pragma warning (pop)
+#endif
+            if( k==tk )
+                break;
+            // Another thread snatched the item, retry.
+        }
+    } while( !r.choose( k ).pop( dst, k, *this ) );
+    return true;
+}
+
+template<typename T>
+size_t concurrent_queue_base_v3<T>::internal_size() const {
+    concurrent_queue_rep<T>& r = *my_rep;
+    __TBB_ASSERT( sizeof(ptrdiff_t)<=sizeof(size_t), NULL );
+    ticket hc = r.head_counter;
+    size_t nie = r.n_invalid_entries;
+    ticket tc = r.tail_counter;
+    __TBB_ASSERT( hc!=tc || !nie, NULL );
+    ptrdiff_t sz = tc-hc-nie;
+    return sz<0 ? 0 :  size_t(sz);
+}
+
+template<typename T>
+bool concurrent_queue_base_v3<T>::internal_empty() const {
+    concurrent_queue_rep<T>& r = *my_rep;
+    ticket tc = r.tail_counter;
+    ticket hc = r.head_counter;
+    // if tc!=r.tail_counter, the queue was not empty at some point between the two reads.
+    return tc==r.tail_counter && tc==hc+r.n_invalid_entries ;
+}
+
+template<typename T>
+void concurrent_queue_base_v3<T>::internal_finish_clear() {
+    concurrent_queue_rep<T>& r = *my_rep;
+    size_t nq = r.n_queue;
+    for( size_t i=0; i<nq; ++i ) {
+        page* tp = r.array[i].tail_page;
+        if( is_valid_page(tp) ) {
+            __TBB_ASSERT( r.array[i].head_page==tp, "at most one page should remain" );
+            deallocate_page( tp );
+            r.array[i].tail_page = NULL;
+        } else 
+            __TBB_ASSERT( !is_valid_page(r.array[i].head_page), "head page pointer corrupt?" );
+    }
+}
+
+template<typename T>
+void concurrent_queue_base_v3<T>::assign( const concurrent_queue_base_v3& src ) {
+    concurrent_queue_rep<T>& r = *my_rep;
+    r.items_per_page = src.my_rep->items_per_page;
+
+    // copy concurrent_queue_rep.
+    r.head_counter = src.my_rep->head_counter;
+    r.tail_counter = src.my_rep->tail_counter;
+    r.n_invalid_entries = src.my_rep->n_invalid_entries;
+
+    // copy micro_queues
+    for( size_t i = 0; i<r.n_queue; ++i )
+        r.array[i].assign( src.my_rep->array[i], *this);
+
+    __TBB_ASSERT( r.head_counter==src.my_rep->head_counter && r.tail_counter==src.my_rep->tail_counter, 
+            "the source concurrent queue should not be concurrently modified." );
+}
+
+template<typename Container, typename Value> class concurrent_queue_iterator;
+
+template<typename T>
+class concurrent_queue_iterator_rep: no_assign {
+    typedef typename micro_queue<T>::padded_page padded_page;
+public:
+    ticket head_counter;
+    const concurrent_queue_base_v3<T>& my_queue;
+    typename concurrent_queue_base_v3<T>::page* array[concurrent_queue_rep<T>::n_queue];
+    concurrent_queue_iterator_rep( const concurrent_queue_base_v3<T>& queue ) :
+        head_counter(queue.my_rep->head_counter),
+        my_queue(queue)
+    {
+        for( size_t k=0; k<concurrent_queue_rep<T>::n_queue; ++k )
+            array[k] = queue.my_rep->array[k].head_page;
+    }
+
+    //! Set item to point to kth element.  Return true if at end of queue or item is marked valid; false otherwise.
+    bool get_item( T*& item, size_t k ) ;
+};
+
+template<typename T>
+bool concurrent_queue_iterator_rep<T>::get_item( T*& item, size_t k ) {
+    if( k==my_queue.my_rep->tail_counter ) {
+        item = NULL;
+        return true;
+    } else {
+        typename concurrent_queue_base_v3<T>::page* p = array[concurrent_queue_rep<T>::index(k)];
+        __TBB_ASSERT(p,NULL);
+        size_t i = k/concurrent_queue_rep<T>::n_queue & (my_queue.my_rep->items_per_page-1);
+        item = &micro_queue<T>::get_ref(*p,i);
+        return (p->mask & uintptr_t(1)<<i)!=0;
+    }
+}
+
+//! Constness-independent portion of concurrent_queue_iterator.
+/** @ingroup containers */
+template<typename Value>
+class concurrent_queue_iterator_base_v3 : no_assign {
+    //! Represents concurrent_queue over which we are iterating.
+    /** NULL if one past last element in queue. */
+    concurrent_queue_iterator_rep<Value>* my_rep;
+
+    template<typename C, typename T, typename U>
+    friend bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
+
+    template<typename C, typename T, typename U>
+    friend bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
+protected:
+    //! Pointer to current item
+    Value* my_item;
+
+    //! Default constructor
+    concurrent_queue_iterator_base_v3() : my_rep(NULL), my_item(NULL) {
+#if __TBB_GCC_OPTIMIZER_ORDERING_BROKEN
+        __asm__ __volatile__("": : :"memory");
+#endif
+    }
+
+    //! Copy constructor
+    concurrent_queue_iterator_base_v3( const concurrent_queue_iterator_base_v3& i )
+    : no_assign(), my_rep(NULL), my_item(NULL) {
+        assign(i);
+    }
+
+    //! Construct iterator pointing to head of queue.
+    concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3<Value>& queue ) ;
+
+    //! Assignment
+    void assign( const concurrent_queue_iterator_base_v3<Value>& other ) ;
+
+    //! Advance iterator one step towards tail of queue.
+    void advance() ;
+
+    //! Destructor
+    ~concurrent_queue_iterator_base_v3() {
+        cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().deallocate(my_rep, 1);
+        my_rep = NULL;
+    }
+};
+
+template<typename Value>
+concurrent_queue_iterator_base_v3<Value>::concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3<Value>& queue ) {
+    my_rep = cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().allocate(1);
+    new( my_rep ) concurrent_queue_iterator_rep<Value>(queue);
+    size_t k = my_rep->head_counter;
+    if( !my_rep->get_item(my_item, k) ) advance();
+}
+
+template<typename Value>
+void concurrent_queue_iterator_base_v3<Value>::assign( const concurrent_queue_iterator_base_v3<Value>& other ) {
+    if( my_rep!=other.my_rep ) {
+        if( my_rep ) {
+            cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().deallocate(my_rep, 1);
+            my_rep = NULL;
+        }
+        if( other.my_rep ) {
+            my_rep = cache_aligned_allocator<concurrent_queue_iterator_rep<Value> >().allocate(1);
+            new( my_rep ) concurrent_queue_iterator_rep<Value>( *other.my_rep );
+        }
+    }
+    my_item = other.my_item;
+}
+
+template<typename Value>
+void concurrent_queue_iterator_base_v3<Value>::advance() {
+    __TBB_ASSERT( my_item, "attempt to increment iterator past end of queue" );  
+    size_t k = my_rep->head_counter;
+    const concurrent_queue_base_v3<Value>& queue = my_rep->my_queue;
+#if TBB_USE_ASSERT
+    Value* tmp;
+    my_rep->get_item(tmp,k);
+    __TBB_ASSERT( my_item==tmp, NULL );
+#endif /* TBB_USE_ASSERT */
+    size_t i = k/concurrent_queue_rep<Value>::n_queue & (queue.my_rep->items_per_page-1);
+    if( i==queue.my_rep->items_per_page-1 ) {
+        typename concurrent_queue_base_v3<Value>::page*& root = my_rep->array[concurrent_queue_rep<Value>::index(k)];
+        root = root->next;
+    }
+    // advance k
+    my_rep->head_counter = ++k;
+    if( !my_rep->get_item(my_item, k) ) advance();
+}
+
+//! Similar to C++0x std::remove_cv
+/** "tbb_" prefix added to avoid overload confusion with C++0x implementations. */
+template<typename T> struct tbb_remove_cv {typedef T type;};
+template<typename T> struct tbb_remove_cv<const T> {typedef T type;};
+template<typename T> struct tbb_remove_cv<volatile T> {typedef T type;};
+template<typename T> struct tbb_remove_cv<const volatile T> {typedef T type;};
+
+//! Meets requirements of a forward iterator for STL.
+/** Value is either the T or const T type of the container.
+    @ingroup containers */
+template<typename Container, typename Value>
+class concurrent_queue_iterator: public concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>,
+        public std::iterator<std::forward_iterator_tag,Value> {
+#if !__TBB_TEMPLATE_FRIENDS_BROKEN
+    template<typename T, class A>
+    friend class ::tbb::strict_ppl::concurrent_queue;
+#else
+public: // workaround for MSVC
+#endif 
+    //! Construct iterator pointing to head of queue.
+    concurrent_queue_iterator( const concurrent_queue_base_v3<Value>& queue ) :
+        concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>(queue)
+    {
+    }
+
+public:
+    concurrent_queue_iterator() {}
+
+    concurrent_queue_iterator( const concurrent_queue_iterator<Container,typename Container::value_type>& other ) :
+        concurrent_queue_iterator_base_v3<typename tbb_remove_cv<Value>::type>(other)
+    {}
+
+    //! Iterator assignment
+    concurrent_queue_iterator& operator=( const concurrent_queue_iterator& other ) {
+        this->assign(other);
+        return *this;
+    }
+
+    //! Reference to current item 
+    Value& operator*() const {
+        return *static_cast<Value*>(this->my_item);
+    }
+
+    Value* operator->() const {return &operator*();}
+
+    //! Advance to next item in queue
+    concurrent_queue_iterator& operator++() {
+        this->advance();
+        return *this;
+    }
+
+    //! Post increment
+    Value* operator++(int) {
+        Value* result = &operator*();
+        operator++();
+        return result;
+    }
+}; // concurrent_queue_iterator
+
+
+template<typename C, typename T, typename U>
+bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
+    return i.my_item==j.my_item;
+}
+
+template<typename C, typename T, typename U>
+bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
+    return i.my_item!=j.my_item;
+}
+
+} // namespace internal
+
+//! @endcond
+
+} // namespace strict_ppl
+
+//! @cond INTERNAL
+namespace internal {
+
+class concurrent_queue_rep;
+class concurrent_queue_iterator_rep;
+class concurrent_queue_iterator_base_v3;
+template<typename Container, typename Value> class concurrent_queue_iterator;
+
+//! For internal use only.
+/** Type-independent portion of concurrent_queue.
+    @ingroup containers */
+class concurrent_queue_base_v3: no_copy {
+    //! Internal representation
+    concurrent_queue_rep* my_rep;
+
+    friend class concurrent_queue_rep;
+    friend struct micro_queue;
+    friend class micro_queue_pop_finalizer;
+    friend class concurrent_queue_iterator_rep;
+    friend class concurrent_queue_iterator_base_v3;
+protected:
+    //! Prefix on a page
+    struct page {
+        page* next;
+        uintptr_t mask; 
+    };
+
+    //! Capacity of the queue
+    ptrdiff_t my_capacity;
+   
+    //! Always a power of 2
+    size_t items_per_page;
+
+    //! Size of an item
+    size_t item_size;
+
+#if __TBB_GCC_3_3_PROTECTED_BROKEN
+public:
+#endif
+    template<typename T>
+    struct padded_page: page {
+        //! Not defined anywhere - exists to quiet warnings.
+        padded_page(); 
+        //! Not defined anywhere - exists to quiet warnings.
+        void operator=( const padded_page& );
+        //! Must be last field.
+        T last;
+    };
+
+private:
+    virtual void copy_item( page& dst, size_t index, const void* src ) = 0;
+    virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) = 0;
+protected:
+    __TBB_EXPORTED_METHOD concurrent_queue_base_v3( size_t item_size );
+    virtual __TBB_EXPORTED_METHOD ~concurrent_queue_base_v3();
+
+    //! Enqueue item at tail of queue
+    void __TBB_EXPORTED_METHOD internal_push( const void* src );
+
+    //! Dequeue item from head of queue
+    void __TBB_EXPORTED_METHOD internal_pop( void* dst );
+
+    //! Attempt to enqueue item onto queue.
+    bool __TBB_EXPORTED_METHOD internal_push_if_not_full( const void* src );
+
+    //! Attempt to dequeue item from queue.
+    /** NULL if there was no item to dequeue. */
+    bool __TBB_EXPORTED_METHOD internal_pop_if_present( void* dst );
+
+    //! Get size of queue
+    ptrdiff_t __TBB_EXPORTED_METHOD internal_size() const;
+
+    //! Check if the queue is emtpy
+    bool __TBB_EXPORTED_METHOD internal_empty() const;
+
+    //! Set the queue capacity
+    void __TBB_EXPORTED_METHOD internal_set_capacity( ptrdiff_t capacity, size_t element_size );
+
+    //! custom allocator
+    virtual page *allocate_page() = 0;
+
+    //! custom de-allocator
+    virtual void deallocate_page( page *p ) = 0;
+
+    //! free any remaining pages
+    /* note that the name may be misleading, but it remains so due to a historical accident. */
+    void __TBB_EXPORTED_METHOD internal_finish_clear() ;
+
+    //! throw an exception
+    void __TBB_EXPORTED_METHOD internal_throw_exception() const;
+
+    //! copy internal representation
+    void __TBB_EXPORTED_METHOD assign( const concurrent_queue_base_v3& src ) ;
+
+private:
+    virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) = 0;
+};
+
+//! Type-independent portion of concurrent_queue_iterator.
+/** @ingroup containers */
+class concurrent_queue_iterator_base_v3 {
+    //! concurrent_queue over which we are iterating.
+    /** NULL if one past last element in queue. */
+    concurrent_queue_iterator_rep* my_rep;
+
+    template<typename C, typename T, typename U>
+    friend bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
+
+    template<typename C, typename T, typename U>
+    friend bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j );
+
+    void initialize( const concurrent_queue_base_v3& queue, size_t offset_of_data );
+protected:
+    //! Pointer to current item
+    void* my_item;
+
+    //! Default constructor
+    concurrent_queue_iterator_base_v3() : my_rep(NULL), my_item(NULL) {}
+
+    //! Copy constructor
+    concurrent_queue_iterator_base_v3( const concurrent_queue_iterator_base_v3& i ) : my_rep(NULL), my_item(NULL) {
+        assign(i);
+    }
+
+    //! Obsolete entry point for constructing iterator pointing to head of queue.
+    /** Does not work correctly for SSE types. */
+    __TBB_EXPORTED_METHOD concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3& queue );
+
+    //! Construct iterator pointing to head of queue.
+    __TBB_EXPORTED_METHOD concurrent_queue_iterator_base_v3( const concurrent_queue_base_v3& queue, size_t offset_of_data );
+
+    //! Assignment
+    void __TBB_EXPORTED_METHOD assign( const concurrent_queue_iterator_base_v3& i );
+
+    //! Advance iterator one step towards tail of queue.
+    void __TBB_EXPORTED_METHOD advance();
+
+    //! Destructor
+    __TBB_EXPORTED_METHOD ~concurrent_queue_iterator_base_v3();
+};
+
+typedef concurrent_queue_iterator_base_v3 concurrent_queue_iterator_base;
+
+//! Meets requirements of a forward iterator for STL.
+/** Value is either the T or const T type of the container.
+    @ingroup containers */
+template<typename Container, typename Value>
+class concurrent_queue_iterator: public concurrent_queue_iterator_base,
+        public std::iterator<std::forward_iterator_tag,Value> {
+
+#if !defined(_MSC_VER) || defined(__INTEL_COMPILER)
+    template<typename T, class A>
+    friend class ::tbb::concurrent_bounded_queue;
+
+    template<typename T, class A>
+    friend class ::tbb::deprecated::concurrent_queue;
+#else
+public: // workaround for MSVC
+#endif 
+    //! Construct iterator pointing to head of queue.
+    concurrent_queue_iterator( const concurrent_queue_base_v3& queue ) :
+        concurrent_queue_iterator_base_v3(queue,__TBB_offsetof(concurrent_queue_base_v3::padded_page<Value>,last))
+    {
+    }
+
+public:
+    concurrent_queue_iterator() {}
+
+    /** If Value==Container::value_type, then this routine is the copy constructor. 
+        If Value==const Container::value_type, then this routine is a conversion constructor. */
+    concurrent_queue_iterator( const concurrent_queue_iterator<Container,typename Container::value_type>& other ) :
+        concurrent_queue_iterator_base_v3(other)
+    {}
+
+    //! Iterator assignment
+    concurrent_queue_iterator& operator=( const concurrent_queue_iterator& other ) {
+        assign(other);
+        return *this;
+    }
+
+    //! Reference to current item 
+    Value& operator*() const {
+        return *static_cast<Value*>(my_item);
+    }
+
+    Value* operator->() const {return &operator*();}
+
+    //! Advance to next item in queue
+    concurrent_queue_iterator& operator++() {
+        advance();
+        return *this;
+    }
+
+    //! Post increment
+    Value* operator++(int) {
+        Value* result = &operator*();
+        operator++();
+        return result;
+    }
+}; // concurrent_queue_iterator
+
+
+template<typename C, typename T, typename U>
+bool operator==( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
+    return i.my_item==j.my_item;
+}
+
+template<typename C, typename T, typename U>
+bool operator!=( const concurrent_queue_iterator<C,T>& i, const concurrent_queue_iterator<C,U>& j ) {
+    return i.my_item!=j.my_item;
+}
+
+} // namespace internal;
+
+//! @endcond
+
+} // namespace tbb
+
+#endif /* __TBB_concurrent_queue_internal_H */
diff --git a/tbb/include/tbb/_concurrent_unordered_internal.h b/tbb/include/tbb/_concurrent_unordered_internal.h
new file mode 100644 (file)
index 0000000..a3a1add
--- /dev/null
@@ -0,0 +1,1411 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+/* Container implementations in this header are based on PPL implementations 
+   provided by Microsoft. */
+
+#ifndef __TBB_concurrent_unordered_internal_H
+#define __TBB_concurrent_unordered_internal_H
+
+#include "tbb_stddef.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <iterator>
+#include <utility>      // Need std::pair
+#include <functional>
+#include <string>       // For tbb_hasher
+#include <cstring>      // Need std::memset
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+#include "atomic.h"
+#include "tbb_exception.h"
+#include "tbb_allocator.h"
+
+namespace tbb {
+namespace interface5 {
+//! @cond INTERNAL
+namespace internal {
+
+template <typename T, typename Allocator>
+class split_ordered_list;
+template <typename Traits>
+class concurrent_unordered_base;
+
+// Forward list iterators (without skipping dummy elements)
+template<class Solist, typename Value>
+class flist_iterator : public std::iterator<std::forward_iterator_tag, Value>
+{
+    template <typename T, typename Allocator>
+    friend class split_ordered_list;
+    template <typename Traits>
+    friend class concurrent_unordered_base;
+    template<class M, typename V>
+    friend class flist_iterator;
+
+    typedef typename Solist::nodeptr_t nodeptr_t;
+public:
+    typedef typename Solist::value_type value_type;
+    typedef typename Solist::difference_type difference_type;
+    typedef typename Solist::pointer pointer;
+    typedef typename Solist::reference reference;
+
+    flist_iterator() : my_node_ptr(0) {}
+    flist_iterator( const flist_iterator<Solist, typename Solist::value_type> &other )
+        : my_node_ptr(other.my_node_ptr) {}
+
+    reference operator*() const { return my_node_ptr->my_element; }
+    pointer operator->() const { return &**this; }
+
+    flist_iterator& operator++() {
+        my_node_ptr = my_node_ptr->my_next;
+        return *this;
+    }
+
+    flist_iterator operator++(int) {
+        flist_iterator tmp = *this;
+        ++*this;
+        return tmp;
+    }
+
+protected:
+    flist_iterator(nodeptr_t pnode) : my_node_ptr(pnode) {}
+    nodeptr_t get_node_ptr() const { return my_node_ptr; }
+
+    nodeptr_t my_node_ptr;
+
+    template<typename M, typename T, typename U>
+    friend bool operator==( const flist_iterator<M,T> &i, const flist_iterator<M,U> &j );
+    template<typename M, typename T, typename U>
+    friend bool operator!=( const flist_iterator<M,T>& i, const flist_iterator<M,U>& j );
+};
+
+template<typename Solist, typename T, typename U>
+bool operator==( const flist_iterator<Solist,T> &i, const flist_iterator<Solist,U> &j ) {
+    return i.my_node_ptr == j.my_node_ptr;
+}
+template<typename Solist, typename T, typename U>
+bool operator!=( const flist_iterator<Solist,T>& i, const flist_iterator<Solist,U>& j ) {
+    return i.my_node_ptr != j.my_node_ptr;
+}
+
+// Split-order list iterators, needed to skip dummy elements
+template<class Solist, typename Value>
+class solist_iterator : public flist_iterator<Solist, Value>
+{
+    typedef flist_iterator<Solist, Value> base_type;
+    typedef typename Solist::nodeptr_t nodeptr_t;
+    using base_type::get_node_ptr;
+    template <typename T, typename Allocator>
+    friend class split_ordered_list;
+    template<class M, typename V>
+    friend class solist_iterator;
+    template<typename M, typename T, typename U>
+    friend bool operator==( const solist_iterator<M,T> &i, const solist_iterator<M,U> &j );
+    template<typename M, typename T, typename U>
+    friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j );
+
+    const Solist *my_list_ptr;
+    solist_iterator(nodeptr_t pnode, const Solist *plist) : base_type(pnode), my_list_ptr(plist) {}
+
+public:
+    typedef typename Solist::value_type value_type;
+    typedef typename Solist::difference_type difference_type;
+    typedef typename Solist::pointer pointer;
+    typedef typename Solist::reference reference;
+
+    solist_iterator() {}
+    solist_iterator(const solist_iterator<Solist, typename Solist::value_type> &other )
+        : base_type(other), my_list_ptr(other.my_list_ptr) {}
+
+    reference operator*() const {
+        return this->base_type::operator*();
+    }
+
+    pointer operator->() const {
+        return (&**this);
+    }
+
+    solist_iterator& operator++() {
+        do ++(*(base_type *)this);
+        while (get_node_ptr() != NULL && get_node_ptr()->is_dummy());
+
+        return (*this);
+    }
+
+    solist_iterator operator++(int) {
+        solist_iterator tmp = *this;
+        do ++*this;
+        while (get_node_ptr() != NULL && get_node_ptr()->is_dummy());
+
+        return (tmp);
+    }
+};
+
+template<typename Solist, typename T, typename U>
+bool operator==( const solist_iterator<Solist,T> &i, const solist_iterator<Solist,U> &j ) {
+    return i.my_node_ptr == j.my_node_ptr && i.my_list_ptr == j.my_list_ptr;
+}
+template<typename Solist, typename T, typename U>
+bool operator!=( const solist_iterator<Solist,T>& i, const solist_iterator<Solist,U>& j ) {
+    return i.my_node_ptr != j.my_node_ptr || i.my_list_ptr != j.my_list_ptr;
+}
+
+// Forward type and class definitions
+typedef size_t sokey_t;
+
+// Forward list in which elements are sorted in a split-order
+template <typename T, typename Allocator>
+class split_ordered_list
+{
+public:
+    typedef split_ordered_list<T, Allocator> self_type;
+    typedef typename Allocator::template rebind<T>::other allocator_type;
+    struct node;
+    typedef node *nodeptr_t;
+
+    typedef typename allocator_type::size_type size_type;
+    typedef typename allocator_type::difference_type difference_type;
+    typedef typename allocator_type::pointer pointer;
+    typedef typename allocator_type::const_pointer const_pointer;
+    typedef typename allocator_type::reference reference;
+    typedef typename allocator_type::const_reference const_reference;
+    typedef typename allocator_type::value_type value_type;
+
+    typedef solist_iterator<self_type, const value_type> const_iterator;
+    typedef solist_iterator<self_type, value_type> iterator;
+    typedef flist_iterator<self_type, const value_type> raw_const_iterator;
+    typedef flist_iterator<self_type, value_type> raw_iterator;
+
+    // Node that holds the element in a split-ordered list
+    struct node : tbb::internal::no_assign
+    {
+        // Initialize the node with the given order key
+        void init(sokey_t order_key) {
+            my_order_key = order_key;
+            my_next = NULL;
+        }
+
+        // Return the order key (needed for hashing)
+        sokey_t get_order_key() const { // TODO: remove
+            return my_order_key;
+        }
+
+        // Inserts the new element in the list in an atomic fashion
+        nodeptr_t atomic_set_next(nodeptr_t new_node, nodeptr_t current_node)
+        {
+            // Try to change the next pointer on the current element to a new element, only if it still points to the cached next
+            nodeptr_t exchange_node = (nodeptr_t) __TBB_CompareAndSwapW((void *) &my_next, (uintptr_t)new_node, (uintptr_t)current_node);
+
+            if (exchange_node == current_node) // TODO: why this branch?
+            {
+                // Operation succeeded, return the new node
+                return new_node;
+            }
+            else
+            {
+                // Operation failed, return the "interfering" node
+                return exchange_node;
+            }
+        }
+
+        // Checks if this element in the list is a dummy, order enforcing node. Dummy nodes are used by buckets
+        // in the hash table to quickly index into the right subsection of the split-ordered list.
+        bool is_dummy() const {
+            return (my_order_key & 0x1) == 0;
+        }
+
+
+        nodeptr_t  my_next;      // Next element in the list
+        value_type my_element;   // Element storage
+        sokey_t    my_order_key; // Order key for this element
+    };
+
+    // Allocate a new node with the given order key and value
+    nodeptr_t create_node(sokey_t order_key, const T &value) {
+        nodeptr_t pnode = my_node_allocator.allocate(1);
+
+        __TBB_TRY {
+            new(static_cast<void*>(&pnode->my_element)) T(value);
+            pnode->init(order_key);
+        } __TBB_CATCH(...) {
+            my_node_allocator.deallocate(pnode, 1);
+            __TBB_RETHROW();
+        }
+
+        return (pnode);
+    }
+
+    // Allocate a new node with the given order key; used to allocate dummy nodes
+    nodeptr_t create_node(sokey_t order_key) {
+        nodeptr_t pnode = my_node_allocator.allocate(1);
+
+        __TBB_TRY {
+            new(static_cast<void*>(&pnode->my_element)) T();
+            pnode->init(order_key);
+        } __TBB_CATCH(...) {
+            my_node_allocator.deallocate(pnode, 1);
+            __TBB_RETHROW();
+        }
+
+        return (pnode);
+    }
+
+   split_ordered_list(allocator_type a = allocator_type())
+       : my_node_allocator(a), my_element_count(0)
+    {
+        // Immediately allocate a dummy node with order key of 0. This node
+        // will always be the head of the list.
+        my_head = create_node(0);
+    }
+
+    ~split_ordered_list()
+    {
+        // Clear the list
+        clear();
+
+        // Remove the head element which is not cleared by clear()
+        nodeptr_t pnode = my_head;
+        my_head = NULL;
+
+        __TBB_ASSERT(pnode != NULL && pnode->my_next == NULL, "Invalid head list node");
+
+        destroy_node(pnode);
+    }
+
+    // Common forward list functions
+
+    allocator_type get_allocator() const {
+        return (my_node_allocator);
+    }
+
+    void clear() {
+        nodeptr_t pnext;
+        nodeptr_t pnode = my_head;
+
+        __TBB_ASSERT(my_head != NULL, "Invalid head list node");
+        pnext = pnode->my_next;
+        pnode->my_next = NULL;
+        pnode = pnext;
+
+        while (pnode != NULL)
+        {
+            pnext = pnode->my_next;
+            destroy_node(pnode);
+            pnode = pnext;
+        }
+
+        my_element_count = 0;
+    }
+
+    // Returns a first non-dummy element in the SOL
+    iterator begin() {
+        return first_real_iterator(raw_begin());
+    }
+
+    // Returns a first non-dummy element in the SOL
+    const_iterator begin() const {
+        return first_real_iterator(raw_begin());
+    }
+
+    iterator end() {
+        return (iterator(0, this));
+    }
+
+    const_iterator end() const {
+        return (const_iterator(0, this));
+    }
+
+    const_iterator cbegin() const {
+        return (((const self_type *)this)->begin());
+    }
+
+    const_iterator cend() const {
+        return (((const self_type *)this)->end());
+    }
+
+    // Checks if the number of elements (non-dummy) is 0
+    bool empty() const {
+        return (my_element_count == 0);
+    }
+
+    // Returns the number of non-dummy elements in the list
+    size_type size() const {
+        return my_element_count;
+    }
+
+    // Returns the maximum size of the list, determined by the allocator
+    size_type max_size() const {
+        return my_node_allocator.max_size();
+    }
+
+    // Swaps 'this' list with the passed in one
+    void swap(self_type& other)
+    {
+        if (this == &other)
+        {
+            // Nothing to do
+            return;
+        }
+
+        std::swap(my_element_count, other.my_element_count);
+        std::swap(my_head, other.my_head);
+    }
+
+    // Split-order list functions
+
+    // Returns a first element in the SOL, which is always a dummy
+    raw_iterator raw_begin() {
+        return raw_iterator(my_head);
+    }
+
+    // Returns a first element in the SOL, which is always a dummy
+    raw_const_iterator raw_begin() const {
+        return raw_const_iterator(my_head);
+    }
+
+    raw_iterator raw_end() {
+        return raw_iterator(0);
+    }
+
+    raw_const_iterator raw_end() const {
+        return raw_const_iterator(0);
+    }
+
+    static sokey_t get_order_key(const raw_const_iterator& it) {
+        return it.get_node_ptr()->get_order_key();
+    }
+
+    static sokey_t get_safe_order_key(const raw_const_iterator& it) {
+        if( !it.get_node_ptr() ) return sokey_t(~0U);
+        return it.get_node_ptr()->get_order_key();
+    }
+
+    // Returns a public iterator version of the internal iterator. Public iterator must not
+    // be a dummy private iterator.
+    iterator get_iterator(raw_iterator it) {
+        __TBB_ASSERT(it.get_node_ptr() == NULL || !it.get_node_ptr()->is_dummy(), "Invalid user node (dummy)");
+        return iterator(it.get_node_ptr(), this);
+    }
+
+    // Returns a public iterator version of the internal iterator. Public iterator must not
+    // be a dummy private iterator.
+    const_iterator get_iterator(raw_const_iterator it) const {
+        __TBB_ASSERT(it.get_node_ptr() == NULL || !it.get_node_ptr()->is_dummy(), "Invalid user node (dummy)");
+        return const_iterator(it.get_node_ptr(), this);
+    }
+
+    // Returns a non-const version of the raw_iterator
+    raw_iterator get_iterator(raw_const_iterator it) {
+        return raw_iterator(it.get_node_ptr());
+    }
+
+    // Returns a non-const version of the iterator
+    static iterator get_iterator(const_iterator it) {
+        return iterator(it.my_node_ptr, it.my_list_ptr);
+    }
+
+    // Returns a public iterator version of a first non-dummy internal iterator at or after
+    // the passed in internal iterator.
+    iterator first_real_iterator(raw_iterator it)
+    {
+        // Skip all dummy, internal only iterators
+        while (it != raw_end() && it.get_node_ptr()->is_dummy())
+            ++it;
+
+        return iterator(it.get_node_ptr(), this);
+    }
+
+    // Returns a public iterator version of a first non-dummy internal iterator at or after
+    // the passed in internal iterator.
+    const_iterator first_real_iterator(raw_const_iterator it) const
+    {
+        // Skip all dummy, internal only iterators
+        while (it != raw_end() && it.get_node_ptr()->is_dummy())
+            ++it;
+
+        return const_iterator(it.get_node_ptr(), this);
+    }
+
+    // Erase an element using the allocator
+    void destroy_node(nodeptr_t pnode) {
+        my_node_allocator.destroy(pnode);
+        my_node_allocator.deallocate(pnode, 1);
+    }
+
+    // Try to insert a new element in the list. If insert fails, return the node that
+    // was inserted instead.
+    nodeptr_t try_insert(nodeptr_t previous, nodeptr_t new_node, nodeptr_t current_node) {
+        new_node->my_next = current_node;
+        return previous->atomic_set_next(new_node, current_node);
+    }
+
+    // Insert a new element between passed in iterators
+    std::pair<iterator, bool> try_insert(raw_iterator it, raw_iterator next, const value_type &value, sokey_t order_key, size_type *new_count)
+    {
+        nodeptr_t pnode = create_node(order_key, value);
+        nodeptr_t inserted_node = try_insert(it.get_node_ptr(), pnode, next.get_node_ptr());
+
+        if (inserted_node == pnode)
+        {
+            // If the insert succeeded, check that the order is correct and increment the element count
+            check_range();
+            *new_count = __TBB_FetchAndAddW((uintptr_t*)&my_element_count, uintptr_t(1));
+            return std::pair<iterator, bool>(iterator(pnode, this), true);
+        }
+        else
+        {
+            // If the insert failed (element already there), then delete the new one
+            destroy_node(pnode);
+            return std::pair<iterator, bool>(end(), false);
+        }
+    }
+
+    // Insert a new dummy element, starting search at a parent dummy element
+    raw_iterator insert_dummy(raw_iterator it, sokey_t order_key)
+    {
+        raw_iterator last = raw_end();
+        raw_iterator where = it;
+
+        __TBB_ASSERT(where != last, "Invalid head node");
+
+        ++where;
+
+        // Create a dummy element up front, even though it may be discarded (due to concurrent insertion)
+        nodeptr_t dummy_node = create_node(order_key);
+
+        for (;;)
+        {
+            __TBB_ASSERT(it != last, "Invalid head list node");
+
+            // If the head iterator is at the end of the list, or past the point where this dummy
+            // node needs to be inserted, then try to insert it.
+            if (where == last || get_order_key(where) > order_key)
+            {
+                __TBB_ASSERT(get_order_key(it) < order_key, "Invalid node order in the list");
+
+                // Try to insert it in the right place
+                nodeptr_t inserted_node = try_insert(it.get_node_ptr(), dummy_node, where.get_node_ptr());
+
+                if (inserted_node == dummy_node)
+                {
+                    // Insertion succeeded, check the list for order violations
+                    check_range();
+                    return raw_iterator(dummy_node);
+                }
+                else
+                {
+                    // Insertion failed: either dummy node was inserted by another thread, or
+                    // a real element was inserted at exactly the same place as dummy node.
+                    // Proceed with the search from the previous location where order key was
+                    // known to be larger (note: this is legal only because there is no safe
+                    // concurrent erase operation supported).
+                    where = it;
+                    ++where;
+                    continue;
+                }
+            }
+            else if (get_order_key(where) == order_key)
+            {
+                // Another dummy node with the same value found, discard the new one.
+                destroy_node(dummy_node);
+                return where;
+            }
+
+            // Move the iterator forward
+            it = where;
+            ++where;
+        }
+
+    }
+
+    // This erase function can handle both real and dummy nodes
+    void erase_node(raw_iterator previous, raw_const_iterator& where)
+    {
+        nodeptr_t pnode = (where++).get_node_ptr();
+        nodeptr_t prevnode = previous.get_node_ptr();
+        __TBB_ASSERT(prevnode->my_next == pnode, "Erase must take consecutive iterators");
+        prevnode->my_next = pnode->my_next;
+
+        destroy_node(pnode);
+    }
+
+    // Erase the element (previous node needs to be passed because this is a forward only list)
+    iterator erase_node(raw_iterator previous, const_iterator where)
+    {
+        raw_const_iterator it = where;
+        erase_node(previous, it);
+        my_element_count--;
+
+        return get_iterator(first_real_iterator(it));
+    }
+
+    // Move all elements from the passed in split-ordered list to this one
+    void move_all(self_type& source)
+    {
+        raw_const_iterator first = source.raw_begin();
+        raw_const_iterator last = source.raw_end();
+
+        if (first == last)
+            return;
+
+        nodeptr_t previous_node = my_head;
+        raw_const_iterator begin_iterator = first++;
+
+        // Move all elements one by one, including dummy ones
+        for (raw_const_iterator it = first; it != last;)
+        {
+            nodeptr_t pnode = it.get_node_ptr();
+
+            nodeptr_t dummy_node = pnode->is_dummy() ? create_node(pnode->get_order_key()) : create_node(pnode->get_order_key(), pnode->my_element);
+            previous_node = try_insert(previous_node, dummy_node, NULL);
+            __TBB_ASSERT(previous_node != NULL, "Insertion must succeed");
+            raw_const_iterator where = it++;
+            source.erase_node(get_iterator(begin_iterator), where);
+        }
+        check_range();
+    }
+
+
+private:
+
+    // Check the list for order violations
+    void check_range()
+    {
+#if TBB_USE_ASSERT
+        for (raw_iterator it = raw_begin(); it != raw_end(); ++it)
+        {
+            raw_iterator next_iterator = it;
+            ++next_iterator;
+
+            __TBB_ASSERT(next_iterator == end() || next_iterator.get_node_ptr()->get_order_key() >= it.get_node_ptr()->get_order_key(), "!!! List order inconsistency !!!");
+        }
+#endif
+    }
+
+    typename allocator_type::template rebind<node>::other my_node_allocator;  // allocator object for nodes
+    size_type                                             my_element_count;   // Total item count, not counting dummy nodes
+    nodeptr_t                                             my_head;            // pointer to head node
+};
+
+// Template class for hash compare
+template<typename Key, typename Hasher, typename Key_equality>
+class hash_compare
+{
+public:
+    hash_compare() {}
+
+    hash_compare(Hasher a_hasher) : my_hash_object(a_hasher) {}
+
+    hash_compare(Hasher a_hasher, Key_equality a_keyeq) : my_hash_object(a_hasher), my_key_compare_object(a_keyeq) {}
+
+    size_t operator()(const Key& key) const {
+        return ((size_t)my_hash_object(key));
+    }
+
+    bool operator()(const Key& key1, const Key& key2) const {
+        return (!my_key_compare_object(key1, key2));
+    }
+
+    Hasher       my_hash_object;        // The hash object
+    Key_equality my_key_compare_object; // The equality comparator object
+};
+
+#if _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4127) // warning 4127 -- while (true) has a constant expression in it (for allow_multimapping)
+#endif
+
+template <typename Traits>
+class concurrent_unordered_base : public Traits
+{
+protected:
+    // Type definitions
+    typedef concurrent_unordered_base<Traits> self_type;
+    typedef typename Traits::value_type value_type;
+    typedef typename Traits::key_type key_type;
+    typedef typename Traits::hash_compare hash_compare;
+    typedef typename Traits::value_compare value_compare;
+    typedef typename Traits::allocator_type allocator_type;
+    typedef typename allocator_type::pointer pointer;
+    typedef typename allocator_type::const_pointer const_pointer;
+    typedef typename allocator_type::reference reference;
+    typedef typename allocator_type::const_reference const_reference;
+    typedef typename allocator_type::size_type size_type;
+    typedef typename allocator_type::difference_type difference_type;
+    typedef split_ordered_list<value_type, typename Traits::allocator_type> solist_t;
+    typedef typename solist_t::nodeptr_t nodeptr_t;
+    // Iterators that walk the entire split-order list, including dummy nodes
+    typedef typename solist_t::raw_iterator raw_iterator;
+    typedef typename solist_t::raw_const_iterator raw_const_iterator;
+    typedef typename solist_t::iterator iterator; // TODO: restore const iterator for unordered_sets
+    typedef typename solist_t::const_iterator const_iterator;
+    typedef iterator local_iterator;
+    typedef const_iterator const_local_iterator;
+    using Traits::my_hash_compare;
+    using Traits::get_key;
+    using Traits::allow_multimapping;
+
+private:
+    typedef std::pair<iterator, iterator> pairii_t;
+    typedef std::pair<const_iterator, const_iterator> paircc_t;
+
+    static size_type const pointers_per_table = sizeof(size_type) * 8;              // One bucket segment per bit
+    static const size_type initial_bucket_number = 8;                               // Initial number of buckets
+    static const size_type initial_bucket_load = 4;                                // Initial maximum number of elements per bucket
+
+protected:
+    // Constructors/Destructors
+    concurrent_unordered_base(size_type n_of_buckets = initial_bucket_number,
+        const hash_compare& hc = hash_compare(), const allocator_type& a = allocator_type())
+        : Traits(hc), my_solist(a),
+          my_allocator(a), my_maximum_bucket_size((float) initial_bucket_load)
+    {
+        if( n_of_buckets == 0) ++n_of_buckets;
+        my_number_of_buckets = 1<<__TBB_Log2((uintptr_t)n_of_buckets*2-1); // round up to power of 2
+        internal_init();
+    }
+
+    concurrent_unordered_base(const concurrent_unordered_base& right, const allocator_type& a)
+        : Traits(right.my_hash_compare), my_solist(a), my_allocator(a)
+    {
+        internal_init();
+        internal_copy(right);
+    }
+
+    concurrent_unordered_base(const concurrent_unordered_base& right)
+        : Traits(right.my_hash_compare), my_solist(right.get_allocator()), my_allocator(right.get_allocator())
+    {
+        internal_init();
+        internal_copy(right);
+    }
+
+    concurrent_unordered_base& operator=(const concurrent_unordered_base& right) {
+        if (this != &right)
+            internal_copy(right);
+        return (*this);
+    }
+
+    ~concurrent_unordered_base() {
+        // Delete all node segments
+        internal_clear();
+    }
+
+public:
+    allocator_type get_allocator() const {
+        return my_solist.get_allocator();
+    }
+
+    // Size and capacity function
+    bool empty() const {
+        return my_solist.empty();
+    }
+
+    size_type size() const {
+        return my_solist.size();
+    }
+
+    size_type max_size() const {
+        return my_solist.max_size();
+    }
+
+    // Iterators 
+    iterator begin() {
+        return my_solist.begin();
+    }
+
+    const_iterator begin() const {
+        return my_solist.begin();
+    }
+
+    iterator end() {
+        return my_solist.end();
+    }
+
+    const_iterator end() const {
+        return my_solist.end();
+    }
+
+    const_iterator cbegin() const {
+        return my_solist.cbegin();
+    }
+
+    const_iterator cend() const {
+        return my_solist.cend();
+    }
+
+    // Parallel traversal support
+    class const_range_type : tbb::internal::no_assign {
+        const concurrent_unordered_base &my_table;
+        raw_const_iterator my_begin_node;
+        raw_const_iterator my_end_node;
+        mutable raw_const_iterator my_midpoint_node;
+    public:
+        //! Type for size of a range
+        typedef typename concurrent_unordered_base::size_type size_type;
+        typedef typename concurrent_unordered_base::value_type value_type;
+        typedef typename concurrent_unordered_base::reference reference;
+        typedef typename concurrent_unordered_base::difference_type difference_type;
+        typedef typename concurrent_unordered_base::const_iterator iterator;
+
+        //! True if range is empty.
+        bool empty() const {return my_begin_node == my_end_node;}
+
+        //! True if range can be partitioned into two subranges.
+        bool is_divisible() const {
+            return my_midpoint_node != my_end_node;
+        }
+        //! Split range.
+        const_range_type( const_range_type &r, split ) : 
+            my_table(r.my_table), my_end_node(r.my_end_node)
+        {
+            r.my_end_node = my_begin_node = r.my_midpoint_node;
+            __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
+            __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
+            set_midpoint();
+            r.set_midpoint();
+        }
+        //! Init range with container and grainsize specified
+        const_range_type( const concurrent_unordered_base &a_table ) : 
+            my_table(a_table), my_begin_node(a_table.my_solist.begin()),
+            my_end_node(a_table.my_solist.end())
+        {
+            set_midpoint();
+        }
+        iterator begin() const { return my_table.my_solist.get_iterator(my_begin_node); }
+        iterator end() const { return my_table.my_solist.get_iterator(my_end_node); }
+        //! The grain size for this range.
+        size_type grainsize() const { return 1; }
+
+        //! Set my_midpoint_node to point approximately half way between my_begin_node and my_end_node.
+        void set_midpoint() const {
+            if( my_begin_node == my_end_node ) // not divisible
+                my_midpoint_node = my_end_node;
+            else {
+                sokey_t begin_key = solist_t::get_safe_order_key(my_begin_node);
+                sokey_t end_key = solist_t::get_safe_order_key(my_end_node);
+                size_t mid_bucket = __TBB_ReverseBits( begin_key + (end_key-begin_key)/2 ) % my_table.my_number_of_buckets;
+                while ( !my_table.is_initialized(mid_bucket) ) mid_bucket = my_table.get_parent(mid_bucket);
+                my_midpoint_node = my_table.my_solist.first_real_iterator(my_table.get_bucket( mid_bucket ));
+                if( my_midpoint_node == my_begin_node )
+                    my_midpoint_node = my_end_node;
+#if TBB_USE_ASSERT
+                else {
+                    sokey_t mid_key = solist_t::get_safe_order_key(my_midpoint_node);
+                    __TBB_ASSERT( begin_key < mid_key, "my_begin_node is after my_midpoint_node" );
+                    __TBB_ASSERT( mid_key <= end_key, "my_midpoint_node is after my_end_node" );
+                }
+#endif // TBB_USE_ASSERT
+            }
+        }
+    };
+
+    class range_type : public const_range_type {
+    public:
+        typedef typename concurrent_unordered_base::iterator iterator;
+        //! Split range.
+        range_type( range_type &r, split ) : const_range_type( r, split() ) {}
+        //! Init range with container and grainsize specified
+        range_type( const concurrent_unordered_base &a_table ) : const_range_type(a_table) {}
+
+        iterator begin() const { return solist_t::get_iterator( const_range_type::begin() ); }
+        iterator end() const { return solist_t::get_iterator( const_range_type::end() ); }
+    };
+
+    range_type range() {
+        return range_type( *this );
+    }
+
+    const_range_type range() const {
+        return const_range_type( *this );
+    }
+
+    // Modifiers
+    std::pair<iterator, bool> insert(const value_type& value) {
+        return internal_insert(value);
+    }
+
+    iterator insert(const_iterator, const value_type& value) {
+        // Ignore hint
+        return insert(value).first;
+    }
+
+    template<class Iterator>
+    void insert(Iterator first, Iterator last) {
+        for (Iterator it = first; it != last; ++it)
+            insert(*it);
+    }
+
+    iterator unsafe_erase(const_iterator where) {
+        return internal_erase(where);
+    }
+
+    iterator unsafe_erase(const_iterator first, const_iterator last) {
+        while (first != last)
+            unsafe_erase(first++);
+        return my_solist.get_iterator(first);
+    }
+
+    size_type unsafe_erase(const key_type& key) {
+        pairii_t where = equal_range(key);
+        size_type item_count = internal_distance(where.first, where.second);
+        unsafe_erase(where.first, where.second);
+        return item_count;
+    }
+
+    void swap(concurrent_unordered_base& right) {
+        if (this != &right) {
+            std::swap(my_hash_compare, right.my_hash_compare); // TODO: check what ADL meant here
+            my_solist.swap(right.my_solist);
+            internal_swap_buckets(right);
+            std::swap(my_number_of_buckets, right.my_number_of_buckets);
+            std::swap(my_maximum_bucket_size, right.my_maximum_bucket_size);
+        }
+    }
+
+    // Observers
+    void clear() {
+        // Clear list
+        my_solist.clear();
+
+        // Clear buckets
+        internal_clear();
+
+        // Initialize bucket 0
+        __TBB_ASSERT(my_buckets[0] == NULL, NULL);
+        raw_iterator dummy_node = my_solist.raw_begin();
+        set_bucket(0, dummy_node);
+    }
+
+    // Lookup
+    iterator find(const key_type& key) {
+        return internal_find(key);
+    }
+
+    const_iterator find(const key_type& key) const {
+        return const_cast<self_type*>(this)->internal_find(key);
+    }
+
+    size_type count(const key_type& key) const {
+        if(allow_multimapping) {
+            paircc_t answer = equal_range(key);
+            size_type item_count = internal_distance(answer.first, answer.second);
+            return item_count;
+        } else {
+            return const_cast<self_type*>(this)->internal_find(key) == end()?0:1;
+        }
+    }
+
+    std::pair<iterator, iterator> equal_range(const key_type& key) {
+        return internal_equal_range(key);
+    }
+
+    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+        return const_cast<self_type*>(this)->internal_equal_range(key);
+    }
+
+    // Bucket interface - for debugging 
+    size_type unsafe_bucket_count() const {
+        return my_number_of_buckets;
+    }
+
+    size_type unsafe_max_bucket_count() const {
+        return segment_size(pointers_per_table-1);
+    }
+
+    size_type unsafe_bucket_size(size_type bucket) {
+        size_type item_count = 0;
+        if (is_initialized(bucket)) {
+            raw_iterator it = get_bucket(bucket);
+            ++it;
+            for (; it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy(); ++it)
+                ++item_count;
+        }
+        return item_count;
+    }
+
+    size_type unsafe_bucket(const key_type& key) const {
+        sokey_t order_key = (sokey_t) my_hash_compare(key);
+        size_type bucket = order_key % my_number_of_buckets;
+        return bucket;
+    }
+
+    // If the bucket is initialized, return a first non-dummy element in it
+    local_iterator unsafe_begin(size_type bucket) {
+        if (!is_initialized(bucket))
+            return end();
+
+        raw_iterator it = get_bucket(bucket);
+        return my_solist.first_real_iterator(it);
+    }
+
+    // If the bucket is initialized, return a first non-dummy element in it
+    const_local_iterator unsafe_begin(size_type bucket) const
+    {
+        if (!is_initialized(bucket))
+            return end();
+
+        raw_const_iterator it = get_bucket(bucket);
+        return my_solist.first_real_iterator(it);
+    }
+
+    // @REVIEW: Takes O(n)
+    // Returns the iterator after the last non-dummy element in the bucket
+    local_iterator unsafe_end(size_type bucket)
+    {
+        if (!is_initialized(bucket))
+            return end();
+
+        raw_iterator it = get_bucket(bucket);
+    
+        // Find the end of the bucket, denoted by the dummy element
+        do ++it;
+        while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy());
+
+        // Return the first real element past the end of the bucket
+        return my_solist.first_real_iterator(it);
+    }
+
+    // @REVIEW: Takes O(n)
+    // Returns the iterator after the last non-dummy element in the bucket
+    const_local_iterator unsafe_end(size_type bucket) const
+    {
+        if (!is_initialized(bucket))
+            return end();
+
+        raw_const_iterator it = get_bucket(bucket);
+    
+        // Find the end of the bucket, denoted by the dummy element
+        do ++it;
+        while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy());
+
+        // Return the first real element past the end of the bucket
+        return my_solist.first_real_iterator(it);
+    }
+
+    const_local_iterator unsafe_cbegin(size_type bucket) const {
+        return ((const self_type *) this)->begin();
+    }
+
+    const_local_iterator unsafe_cend(size_type bucket) const {
+        return ((const self_type *) this)->end();
+    }
+
+    // Hash policy
+    float load_factor() const {
+        return (float) size() / (float) unsafe_bucket_count();
+    }
+
+    float max_load_factor() const {
+        return my_maximum_bucket_size;
+    }
+
+    void max_load_factor(float newmax) {
+        if (newmax != newmax || newmax < 0)
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_load_factor);
+        my_maximum_bucket_size = newmax;
+    }
+
+    // This function is a noop, because the underlying split-ordered list
+    // is already sorted, so an increase in the bucket number will be
+    // reflected next time this bucket is touched.
+    void rehash(size_type buckets) {
+        size_type current_buckets = my_number_of_buckets;
+        if (current_buckets >= buckets)
+            return;
+        my_number_of_buckets = 1<<__TBB_Log2((uintptr_t)buckets*2-1); // round up to power of 2
+    }
+
+private:
+
+    // Initialize the hash and keep the first bucket open
+    void internal_init() {
+        // Allocate an array of segment pointers
+        memset(my_buckets, 0, pointers_per_table * sizeof(void *));
+
+        // Initialize bucket 0
+        raw_iterator dummy_node = my_solist.raw_begin();
+        set_bucket(0, dummy_node);
+    }
+
+    void internal_clear() {
+        for (size_type index = 0; index < pointers_per_table; ++index) {
+            if (my_buckets[index] != NULL) {
+                size_type sz = segment_size(index);
+                for (size_type index2 = 0; index2 < sz; ++index2)
+                    my_allocator.destroy(&my_buckets[index][index2]);
+                my_allocator.deallocate(my_buckets[index], sz);
+                my_buckets[index] = 0;
+            }
+        }
+    }
+
+    void internal_copy(const self_type& right) {
+        clear();
+
+        my_maximum_bucket_size = right.my_maximum_bucket_size;
+        my_number_of_buckets = right.my_number_of_buckets;
+
+        __TBB_TRY {
+            insert(right.begin(), right.end());
+            my_hash_compare = right.my_hash_compare;
+        } __TBB_CATCH(...) {
+            my_solist.clear();
+            __TBB_RETHROW();
+        }
+    }
+
+    void internal_swap_buckets(concurrent_unordered_base& right)
+    {
+        // Swap all node segments
+        for (size_type index = 0; index < pointers_per_table; ++index)
+        {
+            raw_iterator * iterator_pointer = my_buckets[index];
+            my_buckets[index] = right.my_buckets[index];
+            right.my_buckets[index] = iterator_pointer;
+        }
+    }
+
+    // Hash APIs
+    size_type internal_distance(const_iterator first, const_iterator last) const
+    {
+        size_type num = 0;
+
+        for (const_iterator it = first; it != last; ++it)
+            ++num;
+
+        return num;
+    }
+
+    // Insert an element in the hash given its value
+    std::pair<iterator, bool> internal_insert(const value_type& value)
+    {
+        sokey_t order_key = (sokey_t) my_hash_compare(get_key(value));
+        size_type bucket = order_key % my_number_of_buckets;
+
+        // If bucket is empty, initialize it first
+        if (!is_initialized(bucket))
+            init_bucket(bucket);
+
+        size_type new_count;
+        order_key = split_order_key_regular(order_key);
+        raw_iterator it = get_bucket(bucket);
+        raw_iterator last = my_solist.raw_end();
+        raw_iterator where = it;
+
+        __TBB_ASSERT(where != last, "Invalid head node");
+
+        // First node is a dummy node
+        ++where;
+
+        for (;;)
+        {
+            if (where == last || solist_t::get_order_key(where) > order_key)
+            {
+                // Try to insert it in the right place
+                std::pair<iterator, bool> result = my_solist.try_insert(it, where, value, order_key, &new_count);
+                
+                if (result.second)
+                {
+                    // Insertion succeeded, adjust the table size, if needed
+                    adjust_table_size(new_count, my_number_of_buckets);
+                    return result;
+                }
+                else
+                {
+                    // Insertion failed: either the same node was inserted by another thread, or
+                    // another element was inserted at exactly the same place as this node.
+                    // Proceed with the search from the previous location where order key was
+                    // known to be larger (note: this is legal only because there is no safe
+                    // concurrent erase operation supported).
+                    where = it;
+                    ++where;
+                    continue;
+                }
+            }
+            else if (!allow_multimapping && solist_t::get_order_key(where) == order_key && my_hash_compare(get_key(*where), get_key(value)) == 0)
+            {
+                // Element already in the list, return it
+                return std::pair<iterator, bool>(my_solist.get_iterator(where), false);
+            }
+
+            // Move the iterator forward
+            it = where;
+            ++where;
+        }
+    }
+
+    // Find the element in the split-ordered list
+    iterator internal_find(const key_type& key)
+    {
+        sokey_t order_key = (sokey_t) my_hash_compare(key);
+        size_type bucket = order_key % my_number_of_buckets;
+
+        // If bucket is empty, initialize it first
+        if (!is_initialized(bucket))
+            init_bucket(bucket);
+
+        order_key = split_order_key_regular(order_key);
+        raw_iterator last = my_solist.raw_end();
+
+        for (raw_iterator it = get_bucket(bucket); it != last; ++it)
+        {
+            if (solist_t::get_order_key(it) > order_key)
+            {
+                // If the order key is smaller than the current order key, the element
+                // is not in the hash.
+                return end();
+            }
+            else if (solist_t::get_order_key(it) == order_key)
+            {
+                // The fact that order keys match does not mean that the element is found.
+                // Key function comparison has to be performed to check whether this is the
+                // right element. If not, keep searching while order key is the same.
+                if (!my_hash_compare(get_key(*it), key))
+                    return my_solist.get_iterator(it);
+            }
+        }
+
+        return end();
+    }
+
+    // Erase an element from the list. This is not a concurrency safe function.
+    iterator internal_erase(const_iterator it)
+    {
+        key_type key = get_key(*it);
+        sokey_t order_key = (sokey_t) my_hash_compare(key);
+        size_type bucket = order_key % my_number_of_buckets;
+
+        // If bucket is empty, initialize it first
+        if (!is_initialized(bucket))
+            init_bucket(bucket);
+
+        order_key = split_order_key_regular(order_key);
+
+        raw_iterator previous = get_bucket(bucket);
+        raw_iterator last = my_solist.raw_end();
+        raw_iterator where = previous;
+
+        __TBB_ASSERT(where != last, "Invalid head node");
+
+        // First node is a dummy node
+        ++where;
+
+        for (;;) {
+            if (where == last)
+                return end();
+            else if (my_solist.get_iterator(where) == it)
+                return my_solist.erase_node(previous, it);
+
+            // Move the iterator forward
+            previous = where;
+            ++where;
+        }
+    }
+
+    // Return the [begin, end) pair of iterators with the same key values.
+    // This operation makes sense only if mapping is many-to-one.
+    pairii_t internal_equal_range(const key_type& key)
+    {
+        sokey_t order_key = (sokey_t) my_hash_compare(key);
+        size_type bucket = order_key % my_number_of_buckets;
+
+        // If bucket is empty, initialize it first
+        if (!is_initialized(bucket))
+            init_bucket(bucket);
+
+        order_key = split_order_key_regular(order_key);
+        raw_iterator end_it = my_solist.raw_end();
+
+        for (raw_iterator it = get_bucket(bucket); it != end_it; ++it)
+        {
+            if (solist_t::get_order_key(it) > order_key)
+            {
+                // There is no element with the given key
+                return pairii_t(end(), end());
+            }
+            else if (solist_t::get_order_key(it) == order_key && !my_hash_compare(get_key(*it), key))
+            {
+                iterator first = my_solist.get_iterator(it);
+                iterator last = first;
+                do ++last; while( allow_multimapping && last != end() && !my_hash_compare(get_key(*last), key) );
+                return pairii_t(first, last);
+            }
+        }
+
+        return pairii_t(end(), end());
+    }
+
+    // Bucket APIs
+    void init_bucket(size_type bucket)
+    {
+        // Bucket 0 has no parent.
+        __TBB_ASSERT( bucket != 0, "The first bucket must always be initialized");
+
+        size_type parent_bucket = get_parent(bucket);
+
+        // All parent_bucket buckets have to be initialized before this bucket is
+        if (!is_initialized(parent_bucket))
+            init_bucket(parent_bucket);
+
+        raw_iterator parent = get_bucket(parent_bucket);
+
+        // Create a dummy first node in this bucket
+        raw_iterator dummy_node = my_solist.insert_dummy(parent, split_order_key_dummy(bucket));
+        set_bucket(bucket, dummy_node);
+    }
+
+    void adjust_table_size(size_type total_elements, size_type current_size)
+    {
+        // Grow the table by a factor of 2 if possible and needed
+        if ( ((float) total_elements / (float) current_size) > my_maximum_bucket_size )
+        {
+            // Double the size of the hash only if size has not changed inbetween loads
+            __TBB_CompareAndSwapW((uintptr_t*)&my_number_of_buckets, uintptr_t(2u*current_size), uintptr_t(current_size) );
+            //Simple "my_number_of_buckets.compare_and_swap( current_size<<1, current_size );" does not work for VC8
+            //due to overzealous compiler warnings in /Wp64 mode
+        }
+    }
+
+    size_type get_parent(size_type bucket) const
+    {
+        // Unsets bucket's most significant turned-on bit
+        size_type msb = __TBB_Log2((uintptr_t)bucket);
+        return bucket & ~(size_type(1) << msb);
+    }
+
+
+    // Dynamic sized array (segments)
+    //! @return segment index of given index in the array
+    static size_type segment_index_of( size_type index ) {
+        return size_type( __TBB_Log2( uintptr_t(index|1) ) );
+    }
+
+    //! @return the first array index of given segment
+    static size_type segment_base( size_type k ) {
+        return (size_type(1)<<k & ~size_type(1));
+    }
+
+    //! @return segment size
+    static size_type segment_size( size_type k ) {
+        return k? size_type(1)<<k : 2;
+    }
+
+    raw_iterator get_bucket(size_type bucket) const {
+        size_type segment = segment_index_of(bucket);
+        bucket -= segment_base(segment);
+        __TBB_ASSERT( my_buckets[segment], "bucket must be in an allocated segment" );
+        return my_buckets[segment][bucket];
+    }
+
+    void set_bucket(size_type bucket, raw_iterator dummy_head) {
+        size_type segment = segment_index_of(bucket);
+        bucket -= segment_base(segment);
+
+        if (my_buckets[segment] == NULL) {
+            size_type sz = segment_size(segment);
+            raw_iterator * new_segment = my_allocator.allocate(sz);
+            std::memset(new_segment, 0, sz*sizeof(raw_iterator));
+
+            if (__TBB_CompareAndSwapW((void *) &my_buckets[segment], (uintptr_t)new_segment, 0) != 0)
+                my_allocator.deallocate(new_segment, sz);
+        }
+
+        my_buckets[segment][bucket] = dummy_head;
+    }
+
+    bool is_initialized(size_type bucket) const {
+        size_type segment = segment_index_of(bucket);
+        bucket -= segment_base(segment);
+
+        if (my_buckets[segment] == NULL)
+            return false;
+
+        raw_iterator it = my_buckets[segment][bucket];
+        return (it.get_node_ptr() != NULL);
+    }
+
+    // Utilities for keys
+
+    // A regular order key has its original hash value reversed and the last bit set
+    sokey_t split_order_key_regular(sokey_t order_key) const {
+        return __TBB_ReverseBits(order_key) | 0x1;
+    }
+
+    // A dummy order key has its original hash value reversed and the last bit unset
+    sokey_t split_order_key_dummy(sokey_t order_key) const {
+        return __TBB_ReverseBits(order_key) & ~(0x1);
+    }
+
+    // Shared variables
+    atomic<size_type>                                             my_number_of_buckets;       // Current table size
+    solist_t                                                      my_solist;                  // List where all the elements are kept
+    typename allocator_type::template rebind<raw_iterator>::other my_allocator;               // Allocator object for segments
+    float                                                         my_maximum_bucket_size;     // Maximum size of the bucket
+    atomic<raw_iterator*>                                         my_buckets[pointers_per_table]; // The segment table
+};
+#if _MSC_VER
+#pragma warning(pop) // warning 4127 -- while (true) has a constant expression in it
+#endif
+
+//! Hash multiplier
+static const size_t hash_multiplier = sizeof(size_t)==4? 2654435769U : 11400714819323198485ULL;
+} // namespace internal
+//! @endcond
+//! Hasher functions
+template<typename T>
+inline size_t tbb_hasher( const T& t ) {
+    return static_cast<size_t>( t ) * internal::hash_multiplier;
+}
+template<typename P>
+inline size_t tbb_hasher( P* ptr ) {
+    size_t const h = reinterpret_cast<size_t>( ptr );
+    return (h >> 3) ^ h;
+}
+template<typename E, typename S, typename A>
+inline size_t tbb_hasher( const std::basic_string<E,S,A>& s ) {
+    size_t h = 0;
+    for( const E* c = s.c_str(); *c; ++c )
+        h = static_cast<size_t>(*c) ^ (h * internal::hash_multiplier);
+    return h;
+}
+template<typename F, typename S>
+inline size_t tbb_hasher( const std::pair<F,S>& p ) {
+    return tbb_hasher(p.first) ^ tbb_hasher(p.second);
+}
+} // namespace interface5
+using interface5::tbb_hasher;
+} // namespace tbb
+#endif// __TBB_concurrent_unordered_internal_H
diff --git a/tbb/include/tbb/_item_buffer.h b/tbb/include/tbb/_item_buffer.h
new file mode 100644 (file)
index 0000000..fa3ae79
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_item_buffer_H
+#define __TBB_item_buffer_H
+
+    //! Expandable buffer of items.  The possible operations are push, pop,
+    //* tests for empty and so forth.  No mutual exclusion is built in.
+    template <typename T, typename A=cache_aligned_allocator<T> >
+    class item_buffer {
+    public:
+        typedef T input_type;
+        typedef T output_type;
+    protected:
+        typedef size_t size_type;
+        typedef std::pair< T, bool > item_type;
+        typedef typename A::template rebind<item_type>::other allocator_type;
+
+        item_type *my_array;
+        size_type my_array_size;
+        static const size_type initial_buffer_size = 4;
+        size_type my_head;
+        size_type my_tail;
+
+        bool buffer_empty() { return my_head == my_tail; }
+
+        item_type &item(size_type i) { return my_array[i & (my_array_size - 1) ]; } // may not be marked valid
+
+        bool item_valid(size_type i) { return item(i).second; }
+
+        void fetch_front(T &v) { __TBB_ASSERT(item_valid(my_head), "front not valid"); v = item(my_head).first; }
+        void fetch_back(T &v) { __TBB_ASSERT(item_valid(my_tail-1), "back not valid"); v = item(my_tail-1).first; }
+
+        void invalidate(size_type i) { __TBB_ASSERT(item_valid(i), "Item not valid"); item(i).second = false; }
+        void validate(size_type i) { __TBB_ASSERT(!item_valid(i), "Item already valid"); item(i).second = true; }
+
+        void invalidate_front() { invalidate(my_head); }
+        void validate_front() { validate(my_head); }
+        void invalidate_back() { invalidate(my_tail-1); }
+
+        size_type size() { return my_tail - my_head; }
+        size_type capacity() { return my_array_size; }
+        bool buffer_full() { return size() == capacity(); }
+
+        //! Grows the internal array.
+        void grow_my_array( size_t minimum_size ) {
+            size_type old_size = my_array_size;
+            size_type new_size = old_size ? 2*old_size : initial_buffer_size;
+            while( new_size<minimum_size )
+                new_size*=2;
+
+            item_type* new_array = allocator_type().allocate(new_size);
+            item_type* old_array = my_array;
+
+            for( size_type i=0; i<new_size; ++i ) {
+                new (&(new_array[i].first)) input_type();
+                new_array[i].second = false;
+            }
+
+            size_t t=my_head;
+            for( size_type i=0; i<old_size; ++i, ++t )
+                new_array[t&(new_size-1)] = old_array[t&(old_size-1)];
+            my_array = new_array;
+            my_array_size = new_size;
+            if( old_array ) {
+                for( size_type i=0; i<old_size; ++i, ++t )
+                    old_array[i].first.~input_type();
+                allocator_type().deallocate(old_array,old_size);
+            }
+        }
+
+        bool push_back(T &v) {
+            if(buffer_full()) {
+                grow_my_array(size() + 1);
+            }
+            item(my_tail) = std::make_pair( v, true );
+            ++my_tail;
+            return true;
+        }
+
+        bool pop_back(T &v) {
+            if (!item_valid(my_tail-1)) {
+                return false;
+            }
+            fetch_back(v);
+            invalidate_back();
+            --my_tail;
+            return true;
+        }
+
+        bool pop_front(T &v) {
+            if(!item_valid(my_head)) {
+                return false;
+            }
+            fetch_front(v);
+            invalidate_front();
+            ++my_head;
+            return true;
+        }
+
+    public:
+        //! Constructor
+        item_buffer( ) : my_array(NULL), my_array_size(0),
+            my_head(0), my_tail(0) {
+            grow_my_array(initial_buffer_size);
+        }
+
+        ~item_buffer() {
+            if (my_array) {
+                for( size_type i=0; i<my_array_size; ++i ) {
+                    my_array[i].first.~input_type();
+                }
+                allocator_type().deallocate(my_array,my_array_size); 
+            }
+        }
+
+    };
+
+    //! item_buffer with reservable front-end.  NOTE: if reserving, do not
+    //* complete operation with pop_front(); use consume_front().  
+    //* No synchronization built-in.
+    template<typename T, typename A=cache_aligned_allocator<T> >
+    class reservable_item_buffer : public item_buffer<T, A> {
+    protected:
+        using item_buffer<T, A>::buffer_empty;
+        using item_buffer<T, A>::fetch_front;
+        using item_buffer<T, A>::invalidate_front;
+        using item_buffer<T, A>::validate_front;
+        using item_buffer<T, A>::item_valid;
+        using item_buffer<T, A>::my_head;
+
+    public:
+        reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
+    protected:
+
+        bool reserve_front(T &v) {
+            if(my_reserved || !item_valid(my_head)) return false;
+            my_reserved = true;
+            // reserving the head
+            fetch_front(v);
+            // invalidate the head, but don't commit until consume is called
+            invalidate_front();
+            return true;
+        }
+
+        void consume_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
+            ++my_head;
+            my_reserved = false;
+        }
+
+        void release_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
+            validate_front();
+            my_reserved = false;
+        }
+
+        bool my_reserved;
+    };
+
+#endif // __TBB_item_buffer_H
diff --git a/tbb/include/tbb/_tbb_windef.h b/tbb/include/tbb/_tbb_windef.h
new file mode 100644 (file)
index 0000000..dd96c60
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_windef_H
+#error Do not #include this file directly.  Use "#include tbb/tbb_stddef.h" instead.
+#endif /* __TBB_tbb_windef_H */
+
+// Check that the target Windows version has all API calls requried for TBB.
+// Do not increase the version in condition beyond 0x0500 without prior discussion!
+#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0400
+#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0x0400 or greater.
+#endif
+
+#if !defined(_MT)
+#error TBB requires linkage with multithreaded C/C++ runtime library. \
+       Choose multithreaded DLL runtime in project settings, or use /MD[d] compiler switch.
+#endif
+
+// Workaround for the problem with MVSC headers failing to define namespace std
+namespace std {
+  using ::size_t; using ::ptrdiff_t;
+}
+
+#define __TBB_STRING_AUX(x) #x
+#define __TBB_STRING(x) __TBB_STRING_AUX(x)
+
+// Default setting of TBB_USE_DEBUG
+#ifdef TBB_USE_DEBUG
+#    if TBB_USE_DEBUG 
+#        if !defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
+#        endif
+#    else
+#        if defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
+#        endif
+#    endif
+#else
+#    ifdef _DEBUG
+#        define TBB_USE_DEBUG 1
+#    endif
+#endif 
+
+#if __TBB_BUILD && !defined(__TBB_NO_IMPLICIT_LINKAGE)
+#define __TBB_NO_IMPLICIT_LINKAGE 1
+#endif
+
+#if _MSC_VER
+    #if !__TBB_NO_IMPLICIT_LINKAGE
+        #ifdef __TBB_LIB_NAME
+               #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
+        #else
+                       #ifdef _DEBUG
+                               #pragma comment(lib, "tbb_debug.lib")
+                       #else
+                               #pragma comment(lib, "tbb.lib")
+                       #endif
+        #endif
+    #endif
+#endif
diff --git a/tbb/include/tbb/aligned_space.h b/tbb/include/tbb/aligned_space.h
new file mode 100644 (file)
index 0000000..7d642fb
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_aligned_space_H
+#define __TBB_aligned_space_H
+
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+
+namespace tbb {
+
+//! Block of space aligned sufficiently to construct an array T with N elements.
+/** The elements are not constructed or destroyed by this class.
+    @ingroup memory_allocation */
+template<typename T,size_t N>
+class aligned_space {
+private:
+    typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type;
+    element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_type)];
+public:
+    //! Pointer to beginning of array
+    T* begin() {return internal::punned_cast<T*>(this);}
+
+    //! Pointer to one past last element in array.
+    T* end() {return begin()+N;}
+};
+
+} // namespace tbb 
+
+#endif /* __TBB_aligned_space_H */
diff --git a/tbb/include/tbb/atomic.h b/tbb/include/tbb/atomic.h
new file mode 100644 (file)
index 0000000..d360089
--- /dev/null
@@ -0,0 +1,372 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_atomic_H
+#define __TBB_atomic_H
+
+#include <cstddef>
+#include "tbb_stddef.h"
+
+#if _MSC_VER 
+#define __TBB_LONG_LONG __int64
+#else
+#define __TBB_LONG_LONG long long
+#endif /* _MSC_VER */
+
+#include "tbb_machine.h"
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    // Workaround for overzealous compiler warnings 
+    #pragma warning (push)
+    #pragma warning (disable: 4244 4267)
+#endif
+
+namespace tbb {
+
+//! Specifies memory fencing.
+enum memory_semantics {
+    //! For internal use only.
+    __TBB_full_fence,
+    //! Acquire fence
+    acquire,
+    //! Release fence
+    release
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+#if __GNUC__ || __SUNPRO_CC || __IBMCPP__
+#define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f  __attribute__ ((aligned(a)));
+#elif defined(__INTEL_COMPILER)||_MSC_VER >= 1300
+#define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f;
+#else 
+#error Do not know syntax for forcing alignment.
+#endif /* __GNUC__ */
+
+template<size_t S>
+struct atomic_rep;           // Primary template declared, but never defined.
+
+template<>
+struct atomic_rep<1> {       // Specialization
+    typedef int8_t word;
+    int8_t value;
+};
+template<>
+struct atomic_rep<2> {       // Specialization
+    typedef int16_t word;
+    __TBB_DECL_ATOMIC_FIELD(int16_t,value,2)
+};
+template<>
+struct atomic_rep<4> {       // Specialization
+#if _MSC_VER && __TBB_WORDSIZE==4
+    // Work-around that avoids spurious /Wp64 warnings
+    typedef intptr_t word;
+#else
+    typedef int32_t word;
+#endif
+    __TBB_DECL_ATOMIC_FIELD(int32_t,value,4)
+};
+#if __TBB_64BIT_ATOMICS
+template<>
+struct atomic_rep<8> {       // Specialization
+    typedef int64_t word;
+    __TBB_DECL_ATOMIC_FIELD(int64_t,value,8)
+};
+#endif
+
+template<size_t Size, memory_semantics M>
+struct atomic_traits;        // Primary template declared, but not defined.
+
+#define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M)                         \
+    template<> struct atomic_traits<S,M> {                               \
+        typedef atomic_rep<S>::word word;                               \
+        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) {\
+            return __TBB_CompareAndSwap##S##M(location,new_value,comparand);    \
+        }                                                                       \
+        inline static word fetch_and_add( volatile void* location, word addend ) { \
+            return __TBB_FetchAndAdd##S##M(location,addend);                    \
+        }                                                                       \
+        inline static word fetch_and_store( volatile void* location, word value ) {\
+            return __TBB_FetchAndStore##S##M(location,value);                   \
+        }                                                                       \
+    };
+
+#define __TBB_DECL_ATOMIC_PRIMITIVES(S)                                  \
+    template<memory_semantics M>                                         \
+    struct atomic_traits<S,M> {                                          \
+        typedef atomic_rep<S>::word word;                               \
+        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) {\
+            return __TBB_CompareAndSwap##S(location,new_value,comparand);       \
+        }                                                                       \
+        inline static word fetch_and_add( volatile void* location, word addend ) { \
+            return __TBB_FetchAndAdd##S(location,addend);                       \
+        }                                                                       \
+        inline static word fetch_and_store( volatile void* location, word value ) {\
+            return __TBB_FetchAndStore##S(location,value);                      \
+        }                                                                       \
+    };
+
+#if __TBB_DECL_FENCED_ATOMICS
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,__TBB_full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,__TBB_full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,__TBB_full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,__TBB_full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release)
+#if __TBB_64BIT_ATOMICS
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release)
+#endif
+#else
+__TBB_DECL_ATOMIC_PRIMITIVES(1)
+__TBB_DECL_ATOMIC_PRIMITIVES(2)
+__TBB_DECL_ATOMIC_PRIMITIVES(4)
+#if __TBB_64BIT_ATOMICS
+__TBB_DECL_ATOMIC_PRIMITIVES(8)
+#endif
+#endif
+
+//! Additive inverse of 1 for type T.
+/** Various compilers issue various warnings if -1 is used with various integer types.
+    The baroque expression below avoids all the warnings (we hope). */
+#define __TBB_MINUS_ONE(T) (T(T(0)-T(1)))
+
+//! Base class that provides basic functionality for atomic<T> without fetch_and_add.
+/** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor, 
+    and can be copied/compared by memcpy/memcmp. */
+template<typename T>
+struct atomic_impl {
+protected:
+    atomic_rep<sizeof(T)> rep;
+private:
+    //! Union type used to convert type T to underlying integral type.
+    union converter {
+        T value;
+        typename atomic_rep<sizeof(T)>::word bits;
+    };
+public:
+    typedef T value_type;
+
+    template<memory_semantics M>
+    value_type fetch_and_store( value_type value ) {
+        converter u, w;
+        u.value = value;
+        w.bits = internal::atomic_traits<sizeof(value_type),M>::fetch_and_store(&rep.value,u.bits);
+        return w.value;
+    }
+
+    value_type fetch_and_store( value_type value ) {
+        return fetch_and_store<__TBB_full_fence>(value);
+    }
+
+    template<memory_semantics M>
+    value_type compare_and_swap( value_type value, value_type comparand ) {
+        converter u, v, w;
+        u.value = value;
+        v.value = comparand;
+        w.bits = internal::atomic_traits<sizeof(value_type),M>::compare_and_swap(&rep.value,u.bits,v.bits);
+        return w.value;
+    }
+
+    value_type compare_and_swap( value_type value, value_type comparand ) {
+        return compare_and_swap<__TBB_full_fence>(value,comparand);
+    }
+
+    operator value_type() const volatile {                // volatile qualifier here for backwards compatibility 
+        converter w;
+        w.bits = __TBB_load_with_acquire( rep.value );
+        return w.value;
+    }
+
+protected:
+    value_type store_with_release( value_type rhs ) {
+        converter u;
+        u.value = rhs;
+        __TBB_store_with_release(rep.value,u.bits);
+        return rhs;
+    }
+};
+
+//! Base class that provides basic functionality for atomic<T> with fetch_and_add.
+/** I is the underlying type.
+    D is the difference type.
+    StepType should be char if I is an integral type, and T if I is a T*. */
+template<typename I, typename D, typename StepType>
+struct atomic_impl_with_arithmetic: atomic_impl<I> {
+public:
+    typedef I value_type;
+
+    template<memory_semantics M>
+    value_type fetch_and_add( D addend ) {
+        return value_type(internal::atomic_traits<sizeof(value_type),M>::fetch_and_add( &this->rep.value, addend*sizeof(StepType) ));
+    }
+
+    value_type fetch_and_add( D addend ) {
+        return fetch_and_add<__TBB_full_fence>(addend);
+    }
+
+    template<memory_semantics M>
+    value_type fetch_and_increment() {
+        return fetch_and_add<M>(1);
+    }
+
+    value_type fetch_and_increment() {
+        return fetch_and_add(1);
+    }
+
+    template<memory_semantics M>
+    value_type fetch_and_decrement() {
+        return fetch_and_add<M>(__TBB_MINUS_ONE(D));
+    }
+
+    value_type fetch_and_decrement() {
+        return fetch_and_add(__TBB_MINUS_ONE(D));
+    }
+
+public:
+    value_type operator+=( D addend ) {
+        return fetch_and_add(addend)+addend;
+    }
+
+    value_type operator-=( D addend ) {
+        // Additive inverse of addend computed using binary minus,
+        // instead of unary minus, for sake of avoiding compiler warnings.
+        return operator+=(D(0)-addend);    
+    }
+
+    value_type operator++() {
+        return fetch_and_add(1)+1;
+    }
+
+    value_type operator--() {
+        return fetch_and_add(__TBB_MINUS_ONE(D))-1;
+    }
+
+    value_type operator++(int) {
+        return fetch_and_add(1);
+    }
+
+    value_type operator--(int) {
+        return fetch_and_add(__TBB_MINUS_ONE(D));
+    }
+};
+
+} /* Internal */
+//! @endcond
+
+//! Primary template for atomic.
+/** See the Reference for details.
+    @ingroup synchronization */
+template<typename T>
+struct atomic: internal::atomic_impl<T> {
+    T operator=( T rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(rhs); return *this;}
+};
+
+#define __TBB_DECL_ATOMIC(T) \
+    template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {  \
+        T operator=( T rhs ) {return store_with_release(rhs);}  \
+        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
+    };
+
+#if __TBB_64BIT_ATOMICS
+// otherwise size is verified by test_atomic
+__TBB_DECL_ATOMIC(__TBB_LONG_LONG)
+__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
+#endif
+__TBB_DECL_ATOMIC(long)
+__TBB_DECL_ATOMIC(unsigned long)
+
+#if defined(_MSC_VER) && __TBB_WORDSIZE==4
+/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings from cl /Wp64 option. 
+   It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces operator=(T) 
+   with an operator=(U) that explicitly converts the U to a T.  Types T and U should be
+   type synonyms on the platform.  Type U should be the wider variant of T from the
+   perspective of /Wp64. */
+#define __TBB_DECL_ATOMIC_ALT(T,U) \
+    template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {  \
+        T operator=( U rhs ) {return store_with_release(T(rhs));}  \
+        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
+    };
+__TBB_DECL_ATOMIC_ALT(unsigned,size_t)
+__TBB_DECL_ATOMIC_ALT(int,ptrdiff_t)
+#else
+__TBB_DECL_ATOMIC(unsigned)
+__TBB_DECL_ATOMIC(int)
+#endif /* defined(_MSC_VER) && __TBB_WORDSIZE==4 */
+
+__TBB_DECL_ATOMIC(unsigned short)
+__TBB_DECL_ATOMIC(short)
+__TBB_DECL_ATOMIC(char)
+__TBB_DECL_ATOMIC(signed char)
+__TBB_DECL_ATOMIC(unsigned char)
+
+#if !defined(_MSC_VER)||defined(_NATIVE_WCHAR_T_DEFINED) 
+__TBB_DECL_ATOMIC(wchar_t)
+#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
+
+//! Specialization for atomic<T*> with arithmetic and operator->.
+template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T> {
+    T* operator=( T* rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<T*>& operator=( const atomic<T*>& rhs ) {
+        this->store_with_release(rhs); return *this;
+    }
+    T* operator->() const {
+        return (*this);
+    }
+};
+
+//! Specialization for atomic<void*>, for sake of not allowing arithmetic or operator->.
+template<> struct atomic<void*>: internal::atomic_impl<void*> {
+    void* operator=( void* rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<void*>& operator=( const atomic<void*>& rhs ) {
+        this->store_with_release(rhs); return *this;
+    }
+};
+
+} // namespace tbb
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warnings 4244, 4267 are back
+
+#endif /* __TBB_atomic_H */
diff --git a/tbb/include/tbb/blocked_range.h b/tbb/include/tbb/blocked_range.h
new file mode 100644 (file)
index 0000000..ccba501
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_blocked_range_H
+#define __TBB_blocked_range_H
+
+#include "tbb_stddef.h"
+
+namespace tbb {
+
+/** \page range_req Requirements on range concept
+    Class \c R implementing the concept of range must define:
+    - \code R::R( const R& ); \endcode               Copy constructor
+    - \code R::~R(); \endcode                        Destructor
+    - \code bool R::is_divisible() const; \endcode   True if range can be partitioned into two subranges
+    - \code bool R::empty() const; \endcode          True if range is empty
+    - \code R::R( R& r, split ); \endcode            Split range \c r into two subranges.
+**/
+
+//! A range over which to iterate.
+/** @ingroup algorithms */
+template<typename Value>
+class blocked_range {
+public:
+    //! Type of a value
+    /** Called a const_iterator for sake of algorithms that need to treat a blocked_range
+        as an STL container. */
+    typedef Value const_iterator;
+
+    //! Type for size of a range
+    typedef std::size_t size_type;
+
+    //! Construct range with default-constructed values for begin and end.
+    /** Requires that Value have a default constructor. */
+    blocked_range() : my_end(), my_begin() {}
+
+    //! Construct range over half-open interval [begin,end), with the given grainsize.
+    blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : 
+        my_end(end_), my_begin(begin_), my_grainsize(grainsize_) 
+    {
+        __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
+    }
+
+    //! Beginning of range.
+    const_iterator begin() const {return my_begin;}
+
+    //! One past last value in range.
+    const_iterator end() const {return my_end;}
+
+    //! Size of the range
+    /** Unspecified if end()<begin(). */
+    size_type size() const {
+        __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
+        return size_type(my_end-my_begin);
+    }
+
+    //! The grain size for this range.
+    size_type grainsize() const {return my_grainsize;}
+
+    //------------------------------------------------------------------------
+    // Methods that implement Range concept
+    //------------------------------------------------------------------------
+
+    //! True if range is empty.
+    bool empty() const {return !(my_begin<my_end);}
+
+    //! True if range is divisible.
+    /** Unspecified if end()<begin(). */
+    bool is_divisible() const {return my_grainsize<size();}
+
+    //! Split range.  
+    /** The new Range *this has the second half, the old range r has the first half. 
+        Unspecified if end()<begin() or !is_divisible(). */
+    blocked_range( blocked_range& r, split ) : 
+        my_end(r.my_end),
+        my_begin(do_split(r)),
+        my_grainsize(r.my_grainsize)
+    {}
+
+private:
+    /** NOTE: my_end MUST be declared before my_begin, otherwise the forking constructor will break. */
+    Value my_end;
+    Value my_begin;
+    size_type my_grainsize;
+
+    //! Auxiliary function used by forking constructor.
+    /** Using this function lets us not require that Value support assignment or default construction. */
+    static Value do_split( blocked_range& r ) {
+        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
+        Value middle = r.my_begin + (r.my_end-r.my_begin)/2u;
+        r.my_end = middle;
+        return middle;
+    }
+
+    template<typename RowValue, typename ColValue>
+    friend class blocked_range2d;
+
+    template<typename RowValue, typename ColValue, typename PageValue>
+    friend class blocked_range3d;
+};
+
+} // namespace tbb 
+
+#endif /* __TBB_blocked_range_H */
diff --git a/tbb/include/tbb/blocked_range2d.h b/tbb/include/tbb/blocked_range2d.h
new file mode 100644 (file)
index 0000000..9bd0509
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_blocked_range2d_H
+#define __TBB_blocked_range2d_H
+
+#include "tbb_stddef.h"
+#include "blocked_range.h"
+
+namespace tbb {
+
+//! A 2-dimensional range that models the Range concept.
+/** @ingroup algorithms */
+template<typename RowValue, typename ColValue=RowValue>
+class blocked_range2d {
+public:
+    //! Type for size of an iteation range
+    typedef blocked_range<RowValue> row_range_type;
+    typedef blocked_range<ColValue> col_range_type;
+private:
+    row_range_type my_rows;
+    col_range_type my_cols;
+
+public:
+
+    blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
+                     ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : 
+        my_rows(row_begin,row_end,row_grainsize),
+        my_cols(col_begin,col_end,col_grainsize)
+    {
+    }
+
+    blocked_range2d( RowValue row_begin, RowValue row_end,
+                     ColValue col_begin, ColValue col_end ) : 
+        my_rows(row_begin,row_end),
+        my_cols(col_begin,col_end)
+    {
+    }
+
+    //! True if range is empty
+    bool empty() const {
+        // Yes, it is a logical OR here, not AND.
+        return my_rows.empty() || my_cols.empty();
+    }
+
+    //! True if range is divisible into two pieces.
+    bool is_divisible() const {
+        return my_rows.is_divisible() || my_cols.is_divisible();
+    }
+
+    blocked_range2d( blocked_range2d& r, split ) : 
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        if( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
+            my_cols.my_begin = col_range_type::do_split(r.my_cols);
+        } else {
+            my_rows.my_begin = row_range_type::do_split(r.my_rows);
+        }
+    }
+
+    //! The rows of the iteration space 
+    const row_range_type& rows() const {return my_rows;}
+
+    //! The columns of the iteration space 
+    const col_range_type& cols() const {return my_cols;}
+};
+
+} // namespace tbb 
+
+#endif /* __TBB_blocked_range2d_H */
diff --git a/tbb/include/tbb/blocked_range3d.h b/tbb/include/tbb/blocked_range3d.h
new file mode 100644 (file)
index 0000000..85a66f1
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_blocked_range3d_H
+#define __TBB_blocked_range3d_H
+
+#include "tbb_stddef.h"
+#include "blocked_range.h"
+
+namespace tbb {
+
+//! A 3-dimensional range that models the Range concept.
+/** @ingroup algorithms */
+template<typename PageValue, typename RowValue=PageValue, typename ColValue=RowValue>
+class blocked_range3d {
+public:
+    //! Type for size of an iteation range
+    typedef blocked_range<PageValue> page_range_type;
+    typedef blocked_range<RowValue>  row_range_type;
+    typedef blocked_range<ColValue>  col_range_type;
+private:
+    page_range_type my_pages;
+    row_range_type  my_rows;
+    col_range_type  my_cols;
+
+public:
+
+    blocked_range3d( PageValue page_begin, PageValue page_end,
+                     RowValue  row_begin,  RowValue row_end,
+                     ColValue  col_begin,  ColValue col_end ) : 
+        my_pages(page_begin,page_end),
+        my_rows(row_begin,row_end),
+        my_cols(col_begin,col_end)
+    {
+    }
+
+    blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, 
+                     RowValue  row_begin,  RowValue row_end,   typename row_range_type::size_type row_grainsize,
+                     ColValue  col_begin,  ColValue col_end,   typename col_range_type::size_type col_grainsize ) :  
+        my_pages(page_begin,page_end,page_grainsize),
+        my_rows(row_begin,row_end,row_grainsize),
+        my_cols(col_begin,col_end,col_grainsize)
+    {
+    }
+
+    //! True if range is empty
+    bool empty() const {
+        // Yes, it is a logical OR here, not AND.
+        return my_pages.empty() || my_rows.empty() || my_cols.empty();
+    }
+
+    //! True if range is divisible into two pieces.
+    bool is_divisible() const {
+        return  my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
+    }
+
+    blocked_range3d( blocked_range3d& r, split ) : 
+        my_pages(r.my_pages),
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        if( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
+            if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
+                my_cols.my_begin = col_range_type::do_split(r.my_cols);
+            } else {
+                my_rows.my_begin = row_range_type::do_split(r.my_rows);
+            }
+       } else {
+            if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
+                my_cols.my_begin = col_range_type::do_split(r.my_cols);
+            } else {
+                    my_pages.my_begin = page_range_type::do_split(r.my_pages);
+            }
+        }
+    }
+
+    //! The pages of the iteration space 
+    const page_range_type& pages() const {return my_pages;}
+
+    //! The rows of the iteration space 
+    const row_range_type& rows() const {return my_rows;}
+
+    //! The columns of the iteration space 
+    const col_range_type& cols() const {return my_cols;}
+
+};
+
+} // namespace tbb 
+
+#endif /* __TBB_blocked_range3d_H */
diff --git a/tbb/include/tbb/cache_aligned_allocator.h b/tbb/include/tbb/cache_aligned_allocator.h
new file mode 100644 (file)
index 0000000..896a28e
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_cache_aligned_allocator_H
+#define __TBB_cache_aligned_allocator_H
+
+#include <new>
+#include "tbb_stddef.h"
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    //! Cache/sector line size.
+    /** @ingroup memory_allocation */
+    size_t __TBB_EXPORTED_FUNC NFS_GetLineSize();
+
+    //! Allocate memory on cache/sector line boundary.
+    /** @ingroup memory_allocation */
+    void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
+
+    //! Free memory allocated by NFS_Allocate.
+    /** Freeing a NULL pointer is allowed, but has no effect.
+        @ingroup memory_allocation */
+    void __TBB_EXPORTED_FUNC NFS_Free( void* );
+}
+//! @endcond
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
+    #pragma warning (push)
+    #pragma warning (disable: 4100)
+#endif
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** The members are ordered the same way they are in section 20.4.1
+    of the ISO C++ standard.
+    @ingroup memory_allocation */
+template<typename T>
+class cache_aligned_allocator {
+public:
+    typedef typename internal::allocator_type<T>::value_type value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> struct rebind {
+        typedef cache_aligned_allocator<U> other;
+    };
+
+    cache_aligned_allocator() throw() {}
+    cache_aligned_allocator( const cache_aligned_allocator& ) throw() {}
+    template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) throw() {}
+
+    pointer address(reference x) const {return &x;}
+    const_pointer address(const_reference x) const {return &x;}
+    
+    //! Allocate space for n objects, starting on a cache/sector line.
+    pointer allocate( size_type n, const void* hint=0 ) {
+        // The "hint" argument is always ignored in NFS_Allocate thus const_cast shouldn't hurt
+        return pointer(internal::NFS_Allocate( n, sizeof(value_type), const_cast<void*>(hint) ));
+    }
+
+    //! Free block of memory that starts on a cache line
+    void deallocate( pointer p, size_type ) {
+        internal::NFS_Free(p);
+    }
+
+    //! Largest value for which method allocate might succeed.
+    size_type max_size() const throw() {
+        return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type);
+    }
+
+    //! Copy-construct value at location pointed to by p.
+    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
+
+    //! Destroy value at location pointed to by p.
+    void destroy( pointer p ) {p->~value_type();}
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4100 is back
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<> 
+class cache_aligned_allocator<void> {
+public:
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+    template<typename U> struct rebind {
+        typedef cache_aligned_allocator<U> other;
+    };
+};
+
+template<typename T, typename U>
+inline bool operator==( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return true;}
+
+template<typename T, typename U>
+inline bool operator!=( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return false;}
+
+} // namespace tbb
+
+#endif /* __TBB_cache_aligned_allocator_H */
diff --git a/tbb/include/tbb/combinable.h b/tbb/include/tbb/combinable.h
new file mode 100644 (file)
index 0000000..8b3bdef
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_combinable_H
+#define __TBB_combinable_H
+
+#include "enumerable_thread_specific.h"
+#include "cache_aligned_allocator.h"
+
+namespace tbb {
+/** \name combinable
+    **/
+//@{
+//! Thread-local storage with optional reduction
+/** @ingroup containers */
+    template <typename T>
+        class combinable {
+    private:
+        typedef typename tbb::cache_aligned_allocator<T> my_alloc;
+
+        typedef typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key> my_ets_type;
+        my_ets_type my_ets; 
+    public:
+
+        combinable() { }
+
+        template <typename finit>
+        combinable( finit _finit) : my_ets(_finit) { }
+
+        //! destructor
+        ~combinable() { 
+        }
+
+        combinable(const combinable& other) : my_ets(other.my_ets) { }
+
+        combinable & operator=( const combinable & other) { my_ets = other.my_ets; return *this; }
+
+        void clear() { my_ets.clear(); }
+
+        T& local() { return my_ets.local(); }
+
+        T& local(bool & exists) { return my_ets.local(exists); }
+
+        // combine_func_t has signature T(T,T) or T(const T&, const T&)
+        template <typename combine_func_t>
+        T combine(combine_func_t f_combine) { return my_ets.combine(f_combine); }
+
+        // combine_func_t has signature void(T) or void(const T&)
+        template <typename combine_func_t>
+        void combine_each(combine_func_t f_combine) { my_ets.combine_each(f_combine); }
+
+    };
+} // namespace tbb
+#endif /* __TBB_combinable_H */
diff --git a/tbb/include/tbb/compat/condition_variable b/tbb/include/tbb/compat/condition_variable
new file mode 100644 (file)
index 0000000..c73f521
--- /dev/null
@@ -0,0 +1,460 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_condition_variable_H
+#define __TBB_condition_variable_H
+
+#if _WIN32||_WIN64
+#include "../machine/windows_api.h"
+
+namespace tbb { 
+namespace interface5 {
+namespace internal { 
+struct condition_variable_using_event
+{
+    //! Event for blocking waiting threads.
+    HANDLE event;
+    //! Protects invariants involving n_waiters, release_count, and epoch.
+    CRITICAL_SECTION mutex;
+    //! Number of threads waiting on this condition variable
+    int n_waiters;
+    //! Number of threads remaining that should no longer wait on this condition variable.
+    int release_count;
+    //! To keep threads from waking up prematurely with earlier signals.
+    unsigned epoch;
+};
+}}} // namespace tbb::interface5::internal
+
+#ifndef CONDITION_VARIABLE_INIT
+typedef void* CONDITION_VARIABLE;
+typedef CONDITION_VARIABLE* PCONDITION_VARIABLE;
+#endif
+
+#else /* if not _WIN32||_WIN64 */
+#include <errno.h> // some systems need it for ETIMEDOUT
+#include <pthread.h>
+#if __linux__
+#include <ctime>
+#else /* generic Unix */
+#include <sys/time.h>
+#endif
+#endif /* _WIN32||_WIN64 */
+
+#include "../tbb_stddef.h"
+#include "../mutex.h"
+#include "../tbb_thread.h"
+#include "../tbb_exception.h"
+#include "../tbb_profiling.h"
+
+namespace tbb {
+
+namespace interface5 {
+
+// C++0x standard working draft 30.4.3
+// Lock tag types
+struct defer_lock_t { }; //! do not acquire ownership of the mutex
+struct try_to_lock_t { }; //! try to acquire ownership of the mutex without blocking
+struct adopt_lock_t { }; //! assume the calling thread has already
+const defer_lock_t defer_lock = {};
+const try_to_lock_t try_to_lock = {};
+const adopt_lock_t adopt_lock = {};
+
+// C++0x standard working draft 30.4.3.1
+//! lock_guard 
+template<typename M>
+class lock_guard : tbb::internal::no_copy {
+public:
+    //! mutex type
+    typedef M mutex_type;
+
+    //! Constructor
+    /** precondition: If mutex_type is not a recursive mutex, the calling thread
+        does not own the mutex m. */
+    explicit lock_guard(mutex_type& m) : pm(m) {m.lock();}
+    
+    //! Adopt_lock constructor
+    /** precondition: the calling thread owns the mutex m. */
+    lock_guard(mutex_type& m, adopt_lock_t) : pm(m) {}
+
+    //! Destructor
+    ~lock_guard() { pm.unlock(); }
+private:
+    mutex_type& pm;
+};
+
+// C++0x standard working draft 30.4.3.2
+//! unique_lock 
+template<typename M>
+class unique_lock : tbb::internal::no_copy {
+    friend class condition_variable;
+public:
+    typedef M mutex_type;
+
+    // 30.4.3.2.1 construct/copy/destroy
+    // NB: Without constructors that take an r-value reference to a unique_lock, the following constructor is of little use.
+    //! Constructor
+    /** postcondition: pm==0 && owns==false */
+    unique_lock() : pm(NULL), owns(false) {}
+
+    //! Constructor
+    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
+        does not own the mutex m.  If the precondition is not met, a deadlock occurs.
+        postcondition: pm==&m and owns==true */
+    explicit unique_lock(mutex_type& m) : pm(&m) {m.lock(); owns=true;}
+
+    //! Defer_lock constructor
+    /** postcondition: pm==&m and owns==false */
+    unique_lock(mutex_type& m, defer_lock_t) : pm(&m), owns(false) {}
+
+    //! Try_to_lock constructor
+    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
+       does not own the mutex m.  If the precondition is not met, a deadlock occurs.
+       postcondition: pm==&m and owns==res where res is the value returned by
+       the call to m.try_lock(). */
+    unique_lock(mutex_type& m, try_to_lock_t) : pm(&m) {owns = m.try_lock();}
+
+    //! Adopt_lock constructor
+    /** precondition: the calling thread owns the mutex. If it does not, mutex->unlock() would fail.
+        postcondition: pm==&m and owns==true */
+    unique_lock(mutex_type& m, adopt_lock_t) : pm(&m), owns(true) {}
+
+    //! Timed unique_lock acquisition.
+    /** To avoid requiring support for namespace chrono, this method deviates from the working draft in that 
+        it uses tbb::tick_count::interval_t to specify the time duration. */
+    unique_lock(mutex_type& m, const tick_count::interval_t &i) : pm(&m) {owns = try_lock_for( i );}
+
+    //! Destructor
+    ~unique_lock() { if( owns ) pm->unlock(); }
+
+    // 30.4.3.2.2 locking
+    //! Lock the mutex and own it.
+    void lock() {
+        if( pm ) {
+            if( !owns ) {
+                pm->lock();
+                owns = true;
+            } else 
+                throw_exception_v4( tbb::internal::eid_possible_deadlock );
+        } else 
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        __TBB_ASSERT( owns, NULL );
+    }
+
+    //! Try to lock the mutex. 
+    /** If successful, note that this lock owns it. Otherwise, set it false. */
+    bool try_lock() {
+        if( pm ) {
+            if( !owns )
+                owns = pm->try_lock();
+            else
+                throw_exception_v4( tbb::internal::eid_possible_deadlock );
+        } else 
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        return owns;
+    }
+    //! Try to lock the mutex. 
+    bool try_lock_for( const tick_count::interval_t &i );
+
+    //! Unlock the mutex
+    /** And note that this lock no longer owns it. */
+    void unlock() { 
+        if( owns ) {
+            pm->unlock();
+            owns = false;
+        } else
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        __TBB_ASSERT( !owns, NULL );
+    }
+
+    // 30.4.3.2.3 modifiers
+    //! Swap the two unique locks
+    void swap(unique_lock& u) {
+        mutex_type* t_pm = u.pm;    u.pm   = pm;    pm   = t_pm;
+        bool t_owns      = u.owns;  u.owns = owns;  owns = t_owns;
+    }
+
+    //! Release control over the mutex.
+    mutex_type* release() {
+        mutex_type* o_pm = pm; 
+        pm = NULL; 
+        owns = false; 
+        return o_pm; 
+    }
+
+    // 30.4.3.2.4 observers
+    //! Does this lock own the mutex?
+    bool owns_lock() const { return owns; }
+
+    // TODO: Un-comment 'explicit' when the last non-C++0x compiler support is dropped
+    //! Does this lock own the mutex?
+    /*explicit*/ operator bool() const { return owns; }
+
+    //! Return the mutex that this lock currently has.
+    mutex_type* mutex() const { return pm; }
+
+private:
+    mutex_type* pm;
+    bool owns;
+};
+
+template<typename M>
+bool unique_lock<M>::try_lock_for( const tick_count::interval_t &i)
+{ 
+    const int unique_lock_tick = 100; /* microseconds; 0.1 milliseconds */
+    // the smallest wait-time is 0.1 milliseconds.
+    bool res = pm->try_lock();
+    int duration_in_micro; 
+    if( !res && (duration_in_micro=int(i.seconds()*1e6))>unique_lock_tick ) {
+        tick_count::interval_t i_100( double(unique_lock_tick)/1e6 /* seconds */); // 100 microseconds = 0.1*10E-3
+        do {
+            this_tbb_thread::sleep(i_100); // sleep for 100 micro seconds
+            duration_in_micro -= unique_lock_tick;
+            res = pm->try_lock();
+        } while( !res && duration_in_micro>unique_lock_tick );
+    }
+    return (owns=res);
+}
+
+//! Swap the two unique locks that have the mutexes of same type 
+template<typename M>
+void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); }
+
+namespace internal {
+
+#if _WIN32||_WIN64
+union condvar_impl_t {
+    condition_variable_using_event cv_event;
+    CONDITION_VARIABLE             cv_native;
+};
+void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_destroy_condition_variable(    condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_impl_t& cv );
+bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL );
+
+#else /* if !(_WIN32||_WIN64), i.e., POSIX threads */
+typedef pthread_cond_t condvar_impl_t;
+#endif
+
+} // namespace internal
+
+//! cv_status
+/** C++0x standard working draft 30.5 */
+enum cv_status { no_timeout, timeout }; 
+
+//! condition variable
+/** C++0x standard working draft 30.5.1 
+    @ingroup synchronization */
+class condition_variable : tbb::internal::no_copy {
+public:
+    //! Constructor
+    condition_variable() { 
+#if _WIN32||_WIN64
+        internal_initialize_condition_variable( my_cv ); 
+#else
+        pthread_cond_init( &my_cv, NULL );
+#endif
+    }
+
+    //! Destructor
+    ~condition_variable() { 
+        //precondition: There shall be no thread blocked on *this.
+#if _WIN32||_WIN64
+        internal_destroy_condition_variable( my_cv );
+#else
+        pthread_cond_destroy( &my_cv );
+#endif
+    }
+
+    //! Notify one thread and wake it up
+    void notify_one() { 
+#if _WIN32||_WIN64
+        internal_condition_variable_notify_one( my_cv ); 
+#else
+        pthread_cond_signal( &my_cv );
+#endif
+    }
+
+    //! Notify all threads 
+    void notify_all() { 
+#if _WIN32||_WIN64
+        internal_condition_variable_notify_all( my_cv ); 
+#else
+        pthread_cond_broadcast( &my_cv );
+#endif
+    }
+
+    //! Release the mutex associated with the lock and wait on this condition variable
+    void wait(unique_lock<mutex>& lock);
+
+    //! Wait on this condition variable while pred is false
+    template <class Predicate>
+    void wait(unique_lock<mutex>& lock, Predicate pred) {
+        while( !pred() )
+            wait( lock );
+    }
+
+    //! Timed version of wait()
+    cv_status wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i );
+
+    //! Timed version of the predicated wait
+    /** The loop terminates when pred() returns true or when the time duration specified by rel_time (i) has elapsed. */
+    template<typename Predicate>
+    bool wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i, Predicate pred)
+    {
+        while( !pred() ) {
+            cv_status st = wait_for( lock, i );
+            if( st==timeout )
+                return pred();
+        }
+        return true;
+    }
+
+    // C++0x standard working draft. 30.2.3
+    typedef internal::condvar_impl_t* native_handle_type;
+
+    native_handle_type native_handle() { return (native_handle_type) &my_cv; }
+
+private:
+    internal::condvar_impl_t my_cv;
+};
+
+
+#if _WIN32||_WIN64
+inline void condition_variable::wait( unique_lock<mutex>& lock )
+{
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( !internal_condition_variable_wait( my_cv, lock.mutex() ) ) {
+        int ec = GetLastError();
+        // on Windows 7, SleepConditionVariableCS() may return ERROR_TIMEOUT while the doc says it returns WAIT_TIMEOUT
+        __TBB_ASSERT_EX( ec!=WAIT_TIMEOUT&&ec!=ERROR_TIMEOUT, NULL );
+        lock.owns = true;
+        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+    }
+    lock.owns = true;
+}
+
+inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
+{
+    cv_status rc = no_timeout;
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    // condvar_wait could be SleepConditionVariableCS (or SleepConditionVariableSRW) or our own pre-vista cond_var_wait()
+    if( !internal_condition_variable_wait( my_cv, lock.mutex(), &i ) ) {
+        int ec = GetLastError();
+        if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT )
+            rc = timeout;
+        else {
+            lock.owns = true;
+            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+        }
+    }
+    lock.owns = true;
+    return rc;
+}
+
+#else /* !(_WIN32||_WIN64) */
+inline void condition_variable::wait( unique_lock<mutex>& lock )
+{
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) {
+        lock.owns = true;
+        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+    }
+    // upon successful return, the mutex has been locked and is owned by the calling thread.
+    lock.owns = true;
+}
+
+inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
+{
+#if __linux__
+    struct timespec req;
+    double sec = i.seconds();
+    clock_gettime( CLOCK_REALTIME, &req );
+    req.tv_sec  += static_cast<long>(sec);
+    req.tv_nsec += static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
+#else /* generic Unix */
+    struct timeval tv;
+    struct timespec req;
+    double sec = i.seconds();
+    int status = gettimeofday(&tv, NULL);
+    __TBB_ASSERT_EX( status==0, "gettimeofday failed" );
+    req.tv_sec  = tv.tv_sec + static_cast<long>(sec);
+    req.tv_nsec = tv.tv_usec*1000 + static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
+#endif /*(choice of OS) */
+
+    int ec;
+    cv_status rc = no_timeout;
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( ( ec=pthread_cond_timedwait( &my_cv, lock.mutex()->native_handle(), &req ) ) ) {
+        if( ec==ETIMEDOUT )
+            rc = timeout;
+        else {
+            __TBB_ASSERT( lock.try_lock()==false, NULL );
+            lock.owns = true;
+            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+        }
+    }
+    lock.owns = true;
+    return rc;
+}
+#endif /* !(_WIN32||_WIN64) */
+
+} // namespace interface5
+
+__TBB_DEFINE_PROFILING_SET_NAME(interface5::condition_variable)
+
+} // namespace tbb 
+
+#if TBB_IMPLEMENT_CPP0X
+
+namespace std {
+
+using tbb::interface5::defer_lock_t;
+using tbb::interface5::try_to_lock_t;
+using tbb::interface5::adopt_lock_t;
+using tbb::interface5::defer_lock;
+using tbb::interface5::try_to_lock;
+using tbb::interface5::adopt_lock;
+using tbb::interface5::lock_guard;
+using tbb::interface5::unique_lock;
+using tbb::interface5::swap;   /* this is for void std::swap(unique_lock<M>&,unique_lock<M>&) */
+using tbb::interface5::condition_variable;
+using tbb::interface5::cv_status;
+using tbb::interface5::timeout;
+using tbb::interface5::no_timeout;
+
+} // namespace std 
+
+#endif /* TBB_IMPLEMENT_CPP0X */
+
+#endif /* __TBB_condition_variable_H */
diff --git a/tbb/include/tbb/compat/ppl.h b/tbb/include/tbb/compat/ppl.h
new file mode 100644 (file)
index 0000000..a474b73
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_compat_ppl_H
+#define __TBB_compat_ppl_H
+
+#include "../task_group.h"
+#include "../parallel_invoke.h"
+#include "../parallel_for_each.h"
+#include "../parallel_for.h"
+#include "../tbb_exception.h"
+#include "../critical_section.h"
+#include "../reader_writer_lock.h"
+#include "../combinable.h"
+
+namespace Concurrency {
+
+    using tbb::task_handle;
+    using tbb::task_group_status;
+    using tbb::task_group;
+    using tbb::structured_task_group;
+    using tbb::invalid_multiple_scheduling;
+    using tbb::missing_wait;
+    using tbb::make_task;
+
+    using tbb::not_complete;
+    using tbb::complete;
+    using tbb::canceled;
+
+    using tbb::is_current_task_group_canceling;
+
+    using tbb::parallel_invoke;
+    using tbb::strict_ppl::parallel_for;
+    using tbb::parallel_for_each;
+    using tbb::critical_section;
+    using tbb::reader_writer_lock;
+    using tbb::combinable;
+
+    using tbb::improper_lock;
+
+} // namespace Concurrency
+
+#endif /* __TBB_compat_ppl_H */
diff --git a/tbb/include/tbb/compat/thread b/tbb/include/tbb/compat/thread
new file mode 100644 (file)
index 0000000..c4884ec
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_thread_H
+#define __TBB_thread_H
+
+#include "../tbb_thread.h"
+
+#if TBB_IMPLEMENT_CPP0X
+
+namespace std {
+
+typedef tbb::tbb_thread thread;
+
+namespace this_thread {
+    using tbb::this_tbb_thread::get_id;
+    using tbb::this_tbb_thread::yield;
+
+    inline void sleep_for(const tbb::tick_count::interval_t& rel_time) {
+        tbb::internal::thread_sleep_v3( rel_time );
+    }
+
+}
+
+}
+
+#endif /* TBB_IMPLEMENT_CPP0X */
+
+#endif /* __TBB_thread_H */
diff --git a/tbb/include/tbb/compat/tuple b/tbb/include/tbb/compat/tuple
new file mode 100644 (file)
index 0000000..4a4f5f2
--- /dev/null
@@ -0,0 +1,401 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tuple_H
+#define __TBB_tuple_H
+
+#if !TBB_PREVIEW_TUPLE
+#error Set TBB_PREVIEW_TUPLE to include compat/tuple
+#endif
+
+#include <utility>
+#include "../tbb_stddef.h"
+
+namespace tbb {
+namespace interface5 {
+namespace internal {
+struct null_type { };
+}
+using internal::null_type;
+
+// tuple forward declaration
+template <class T0=null_type, class T1=null_type, class T2=null_type, class T3=null_type, class T4=null_type, class T5=null_type, class T6=null_type, class T7=null_type, class T8=null_type, class T9=null_type>
+class tuple;
+
+namespace internal {
+
+// const null_type temp
+inline const null_type cnull() { return null_type(); }
+
+// cons forward declaration
+template <class HT, class TT> struct cons;
+
+// type of a component of the cons
+template<int N, class T>
+struct component {
+    typedef typename T::tail_type next;
+    typedef typename component<N-1,next>::type type;
+};
+
+template<class T>
+struct component<0,T> {
+    typedef typename T::head_type type;
+};
+
+template<>
+struct component<0,null_type> {
+    typedef null_type type;
+};
+
+// const version of component
+
+template<int N, class T>
+struct component<N, const T>
+{
+    typedef typename T::tail_type next;
+    typedef typename component<N-1,next>::type type;
+};
+
+template<class T>
+struct component<0, const T>
+{
+    typedef const typename T::head_type type;
+};
+
+
+// helper class for getting components of cons
+template< int N>
+struct get_helper {
+template<class HT, class TT>
+inline static typename component<N, cons<HT,TT> >::type& get(cons<HT,TT>& ti) {
+    return get_helper<N-1>::get(ti.tail);
+}
+};
+
+template<>
+struct get_helper<0> {
+template<class HT, class TT>
+inline static typename component<0, cons<HT,TT> >::type& get(cons<HT,TT>& ti) {
+    return ti.head;
+}
+};
+
+// traits adaptor
+template <class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+struct tuple_traits {
+    typedef cons <T0, typename tuple_traits<T1, T2, T3, T4, T5, T6, T7, T8, T9, null_type>::U > U;
+};
+
+template <>
+struct tuple_traits<class T0, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type> {
+    typedef cons<T0, null_type> U;
+};
+
+template<>
+struct tuple_traits<null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type> {
+    typedef null_type U;
+};
+
+
+// core cons defs
+template <class HT, class TT>
+struct cons{
+
+    typedef HT head_type;
+    typedef TT tail_type;
+
+    HT head; 
+    TT tail;
+
+    static const int length = 1 + tail_type::length;
+
+    // default constructors
+    explicit cons() : head(), tail() { }
+
+    // non-default constructors
+    cons(head_type& h, const tail_type& t) : head(h), tail(t) { }
+
+    template <class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    cons(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9) :
+        head(t0), tail(t1, t2, t3, t4, t5, t6, t7, t8, t9, cnull()) { }
+
+    template <class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+    cons(T0& t0, T1& t1, T2& t2, T3& t3, T4& t4, T5& t5, T6& t6, T7& t7, T8& t8, T9& t9) :
+        head(t0), tail(t1, t2, t3, t4, t5, t6, t7, t8, t9, cnull()) { }
+
+    template <class HT1, class TT1>
+    cons(const cons<HT1,TT1>& other) : head(other.head), tail(other.tail) { }
+
+    cons& operator=(const cons& other) { head = other.head; tail = other.tail; return *this; }
+
+    friend bool operator==(const cons& me, const cons& other) {
+        return me.head == other.head && me.tail == other.tail;
+    }
+    friend bool operator<(const cons& me, const cons& other)  {
+        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
+    }
+    friend bool operator>(const cons& me, const cons& other)  { return other<me; }
+    friend bool operator!=(const cons& me, const cons& other) { return !(me==other); }
+    friend bool operator>=(const cons& me, const cons& other) { return !(me<other); }
+    friend bool operator<=(const cons& me, const cons& other) { return !(me>other); }
+
+    template<class HT1, class TT1>
+    friend bool operator==(const cons<HT,TT>& me, const cons<HT1,TT1>& other) {
+        return me.head == other.head && me.tail == other.tail;
+    }
+
+    template<class HT1, class TT1>
+    friend bool operator<(const cons<HT,TT>& me, const cons<HT1,TT1>& other) {
+        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
+    }
+
+    template<class HT1, class TT1>
+    friend bool operator>(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return other<me; }
+
+    template<class HT1, class TT1>
+    friend bool operator!=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me==other); }
+
+    template<class HT1, class TT1>
+    friend bool operator>=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me<other); }
+
+    template<class HT1, class TT1>
+    friend bool operator<=(const cons<HT,TT>& me, const cons<HT1,TT1>& other) { return !(me>other); }
+
+
+};  // cons
+
+
+template <class HT>
+struct cons<HT,null_type> { 
+
+    typedef HT head_type;
+    typedef null_type tail_type;
+    static const int length = 1;
+    head_type head; 
+
+    // default constructor
+    cons() : head() { /*std::cout << "default constructor 1\n";*/ }
+
+    cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&) : head() { /*std::cout << "default constructor 2\n";*/ }
+
+    // non-default constructor
+    template<class T1>
+    cons(T1& t1, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type& ) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
+
+    cons(head_type& h, const null_type& = null_type() ) : head(h) { }
+    cons(const head_type& t0, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&) : head(t0) { }
+
+    // converting constructor
+    template<class HT1>
+    cons(HT1 h1, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&) : head(h1) { }
+
+    // copy constructor
+    template<class HT1>
+    cons( const cons<HT1, null_type>& other) : head(other.head) { }
+
+    // assignment operator
+    cons& operator=(const cons& other) { head = other.head; return *this; }
+
+    friend bool operator==(const cons& me, const cons& other) { return me.head == other.head; }
+    friend bool operator<(const cons& me, const cons& other) { return me.head < other.head; }
+    friend bool operator>(const cons& me, const cons& other) { return other<me; }
+    friend bool operator!=(const cons& me, const cons& other) {return !(me==other); }
+    friend bool operator<=(const cons& me, const cons& other) {return !(me>other); }
+    friend bool operator>=(const cons& me, const cons& other) {return !(me<other); }
+
+    template<class HT1>
+    friend bool operator==(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) {
+        return me.head == other.head;
+    }
+
+    template<class HT1>
+    friend bool operator<(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) {
+        return me.head < other.head;
+    }
+
+    template<class HT1>
+    friend bool operator>(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return other<me; }
+
+    template<class HT1>
+    friend bool operator!=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me==other); }
+
+    template<class HT1>
+    friend bool operator<=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me>other); }
+
+    template<class HT1>
+    friend bool operator>=(const cons<HT,null_type>& me, const cons<HT1,null_type>& other) { return !(me<other); }
+
+};  // cons
+
+template <>
+struct cons<null_type,null_type> { typedef null_type tail_type; static const int length = 0; };
+
+// wrapper for default constructor
+template<class T>
+inline const T wrap_dcons(T*) { return T(); }
+} // namespace internal
+
+// tuple definition
+template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+class tuple : public internal::tuple_traits<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::U {
+    // friends
+    template <class T> friend class tuple_size;
+    template<int N, class T> friend struct tuple_element;
+
+    // stl components
+    typedef tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> value_type;
+    typedef value_type *pointer;
+    typedef const value_type *const_pointer;
+    typedef value_type &reference;
+    typedef const value_type &const_reference;
+    typedef size_t size_type;
+
+    typedef typename internal::tuple_traits<T0,T1,T2,T3, T4, T5, T6, T7, T8, T9>::U my_cons;
+public:
+
+    tuple(const T0& t0=internal::wrap_dcons((T0*)NULL),
+          const T1& t1=internal::wrap_dcons((T1*)NULL),
+          const T2& t2=internal::wrap_dcons((T2*)NULL),
+          const T3& t3=internal::wrap_dcons((T3*)NULL),
+          const T4& t4=internal::wrap_dcons((T4*)NULL),
+          const T5& t5=internal::wrap_dcons((T5*)NULL),
+          const T6& t6=internal::wrap_dcons((T6*)NULL),
+          const T7& t7=internal::wrap_dcons((T7*)NULL),
+          const T8& t8=internal::wrap_dcons((T8*)NULL),
+          const T9& t9=internal::wrap_dcons((T9*)NULL)
+          ) :
+        internal::tuple_traits<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>::U(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9) { }
+
+    template<int N>
+    struct internal_tuple_element {
+        typedef typename internal::component<N,my_cons>::type type;
+    };
+
+    template<int N>
+    typename internal_tuple_element<N>::type& get() { return internal::get_helper<N>::get(*this); }
+
+    template<class U1, class U2>
+    tuple& operator=(const internal::cons<U1,U2>& other) {
+        my_cons::operator=(other);
+        return *this;
+    }
+
+    template<class U1, class U2>
+    tuple& operator=(const std::pair<U1,U2>& other) {
+        // __TBB_ASSERT(tuple_size<value_type>::value == 2, "Invalid size for pair to tuple assignment");
+        this->head = other.first;
+        this->tail.head = other.second;
+        return *this;
+    }
+
+    friend bool operator==(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)==(other);}
+    friend bool operator<(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)<(other);}
+    friend bool operator>(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)>(other);}
+    friend bool operator!=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)!=(other);}
+    friend bool operator>=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)>=(other);}
+    friend bool operator<=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)<=(other);}
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator==(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)==(other);
+    }
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator<(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)<(other);
+    }
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator>(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)>(other);
+    }
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator!=(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)!=(other);
+    }
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator>=(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)>=(other);
+    }
+
+    template<class U0, class U1, class U2, class U3, class U4, class U5, class U6, class U7, class U8, class U9>
+    friend bool operator<=(const tuple& me, const tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>& other) {
+        return static_cast<const my_cons &>(me)<=(other);
+    }
+
+};  // tuple
+
+// empty tuple
+template<>
+class tuple<null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type> : public null_type {
+    typedef null_type inherited;
+};
+
+// helper classes
+
+template < class T>
+class tuple_size {
+public:
+    static const size_t value = 1 + tuple_size<typename T::tail_type>::value;
+};
+
+template <>
+class tuple_size<tuple<> > { 
+public:
+    static const size_t value = 0;
+};
+
+template <>
+class tuple_size<null_type> {
+public:
+    static const size_t value = 0;
+};
+
+template<int N, class T>
+struct tuple_element {
+    typedef typename internal::component<N, typename T::my_cons>::type type;
+};
+
+template<int N, class T>
+inline static typename tuple_element<N,T>::type& get(T &t) { return t.get<N>(); }
+
+}  // interface5
+} // tbb
+
+#if TBB_IMPLEMENT_CPP0X
+namespace std {
+using tbb::interface5::tuple;
+using tbb::interface5::tuple_size;
+using tbb::interface5::tuple_element;
+using tbb::interface5::get;
+}
+#endif
+#endif /* __TBB_tuple_H */
diff --git a/tbb/include/tbb/concurrent_hash_map.h b/tbb/include/tbb/concurrent_hash_map.h
new file mode 100644 (file)
index 0000000..39cc308
--- /dev/null
@@ -0,0 +1,1336 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_concurrent_hash_map_H
+#define __TBB_concurrent_hash_map_H
+
+#include "tbb_stddef.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <iterator>
+#include <utility>      // Need std::pair
+#include <cstring>      // Need std::memset
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+#include "cache_aligned_allocator.h"
+#include "tbb_allocator.h"
+#include "spin_rw_mutex.h"
+#include "atomic.h"
+#include "aligned_space.h"
+#include "tbb_exception.h"
+#include "tbb_profiling.h"
+#include "_concurrent_unordered_internal.h" // Need tbb_hasher
+#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+#include <typeinfo>
+#endif
+#if __TBB_STATISTICS
+#include <stdio.h>
+#endif
+
+namespace tbb {
+
+//! hash_compare that is default argument for concurrent_hash_map
+template<typename Key>
+struct tbb_hash_compare {
+    static size_t hash( const Key& a ) { return tbb_hasher(a); }
+    static bool equal( const Key& a, const Key& b ) { return a == b; }
+};
+
+namespace interface5 {
+
+    template<typename Key, typename T, typename HashCompare = tbb_hash_compare<Key>, typename A = tbb_allocator<std::pair<Key, T> > >
+    class concurrent_hash_map;
+
+    //! @cond INTERNAL
+    namespace internal {
+
+
+    //! Type of a hash code.
+    typedef size_t hashcode_t;
+    //! Node base type
+    struct hash_map_node_base : tbb::internal::no_copy {
+        //! Mutex type
+        typedef spin_rw_mutex mutex_t;
+        //! Scoped lock type for mutex
+        typedef mutex_t::scoped_lock scoped_t;
+        //! Next node in chain
+        hash_map_node_base *next;
+        mutex_t mutex;
+    };
+    //! Incompleteness flag value
+    static hash_map_node_base *const rehash_req = reinterpret_cast<hash_map_node_base*>(size_t(3));
+    //! Rehashed empty bucket flag
+    static hash_map_node_base *const empty_rehashed = reinterpret_cast<hash_map_node_base*>(size_t(0));
+    //! base class of concurrent_hash_map
+    class hash_map_base {
+    public:
+        //! Size type
+        typedef size_t size_type;
+        //! Type of a hash code.
+        typedef size_t hashcode_t;
+        //! Segment index type
+        typedef size_t segment_index_t;
+        //! Node base type
+        typedef hash_map_node_base node_base;
+        //! Bucket type
+        struct bucket : tbb::internal::no_copy {
+            //! Mutex type for buckets
+            typedef spin_rw_mutex mutex_t;
+            //! Scoped lock type for mutex
+            typedef mutex_t::scoped_lock scoped_t;
+            mutex_t mutex;
+            node_base *node_list;
+        };
+        //! Count of segments in the first block
+        static size_type const embedded_block = 1;
+        //! Count of segments in the first block
+        static size_type const embedded_buckets = 1<<embedded_block;
+        //! Count of segments in the first block
+        static size_type const first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096
+        //! Size of a pointer / table size
+        static size_type const pointers_per_table = sizeof(segment_index_t) * 8; // one segment per bit
+        //! Segment pointer
+        typedef bucket *segment_ptr_t;
+        //! Segment pointers table type
+        typedef segment_ptr_t segments_table_t[pointers_per_table];
+        //! Hash mask = sum of allocated segment sizes - 1
+        atomic<hashcode_t> my_mask;
+        //! Segment pointers table. Also prevents false sharing between my_mask and my_size
+        segments_table_t my_table;
+        //! Size of container in stored items
+        atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects
+        //! Zero segment
+        bucket my_embedded_segment[embedded_buckets];
+#if __TBB_STATISTICS
+        atomic<unsigned> my_info_resizes; // concurrent ones
+        mutable atomic<unsigned> my_info_restarts; // race collisions
+        atomic<unsigned> my_info_rehashes;  // invocations of rehash_bucket
+#endif
+        //! Constructor
+        hash_map_base() {
+            std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t) // 32*4=128   or 64*8=512
+                + sizeof(my_size) + sizeof(my_mask)  // 4+4 or 8+8
+                + embedded_buckets*sizeof(bucket) ); // n*8 or n*16
+            for( size_type i = 0; i < embedded_block; i++ ) // fill the table
+                my_table[i] = my_embedded_segment + segment_base(i);
+            my_mask = embedded_buckets - 1;
+            __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks");
+#if __TBB_STATISTICS
+            my_info_resizes = 0; // concurrent ones
+            my_info_restarts = 0; // race collisions
+            my_info_rehashes = 0;  // invocations of rehash_bucket
+#endif
+        }
+
+        //! @return segment index of given index in the array
+        static segment_index_t segment_index_of( size_type index ) {
+            return segment_index_t( __TBB_Log2( index|1 ) );
+        }
+
+        //! @return the first array index of given segment
+        static segment_index_t segment_base( segment_index_t k ) {
+            return (segment_index_t(1)<<k & ~segment_index_t(1));
+        }
+
+        //! @return segment size except for @arg k == 0
+        static size_type segment_size( segment_index_t k ) {
+            return size_type(1)<<k; // fake value for k==0
+        }
+        
+        //! @return true if @arg ptr is valid pointer
+        static bool is_valid( void *ptr ) {
+            return reinterpret_cast<size_t>(ptr) > size_t(63);
+        }
+
+        //! Initialize buckets
+        static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_initial ) {
+            if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) );
+            else for(size_type i = 0; i < sz; i++, ptr++) {
+                    *reinterpret_cast<intptr_t*>(&ptr->mutex) = 0;
+                    ptr->node_list = rehash_req;
+                }
+        }
+        
+        //! Add node @arg n to bucket @arg b
+        static void add_to_bucket( bucket *b, node_base *n ) {
+            __TBB_ASSERT(b->node_list != rehash_req, NULL);
+            n->next = b->node_list;
+            b->node_list = n; // its under lock and flag is set
+        }
+
+        //! Exception safety helper
+        struct enable_segment_failsafe {
+            segment_ptr_t *my_segment_ptr;
+            enable_segment_failsafe(segments_table_t &table, segment_index_t k) : my_segment_ptr(&table[k]) {}
+            ~enable_segment_failsafe() {
+                if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no allocation in progress
+            }
+        };
+
+        //! Enable segment
+        void enable_segment( segment_index_t k, bool is_initial = false ) {
+            __TBB_ASSERT( k, "Zero segment must be embedded" );
+            enable_segment_failsafe watchdog( my_table, k );
+            cache_aligned_allocator<bucket> alloc;
+            size_type sz;
+            __TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignment");
+            if( k >= first_block ) {
+                sz = segment_size( k );
+                segment_ptr_t ptr = alloc.allocate( sz );
+                init_buckets( ptr, sz, is_initial );
+                itt_hide_store_word( my_table[k], ptr );
+                sz <<= 1;// double it to get entire capacity of the container
+            } else { // the first block
+                __TBB_ASSERT( k == embedded_block, "Wrong segment index" );
+                sz = segment_size( first_block );
+                segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets );
+                init_buckets( ptr, sz - embedded_buckets, is_initial );
+                ptr -= segment_base(embedded_block);
+                for(segment_index_t i = embedded_block; i < first_block; i++) // calc the offsets
+                    itt_hide_store_word( my_table[i], ptr + segment_base(i) );
+            }
+            itt_store_word_with_release( my_mask, sz-1 );
+            watchdog.my_segment_ptr = 0;
+        }
+
+        //! Get bucket by (masked) hashcode
+        bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add throw() everywhere?
+            segment_index_t s = segment_index_of( h );
+            h -= segment_base(s);
+            segment_ptr_t seg = my_table[s];
+            __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" );
+            return &seg[h];
+        }
+
+        // internal serial rehashing helper
+        void mark_rehashed_levels( hashcode_t h ) throw () {
+            segment_index_t s = segment_index_of( h );
+            while( segment_ptr_t seg = my_table[++s] )
+                if( seg[h].node_list == rehash_req ) {
+                    seg[h].node_list = empty_rehashed;
+                    mark_rehashed_levels( h + ((hashcode_t)1<<s) ); // optimized segment_base(s)
+                }
+        }
+
+        //! Check for mask race
+        // Splitting into two functions should help inlining
+        inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) const {
+            hashcode_t m_now, m_old = m;
+            m_now = (hashcode_t) itt_load_word_with_acquire( my_mask );
+            if( m_old != m_now )
+                return check_rehashing_collision( h, m_old, m = m_now );
+            return false;
+        }
+
+        //! Process mask race, check for rehashing collision
+        bool check_rehashing_collision( const hashcode_t h, hashcode_t m_old, hashcode_t m ) const {
+            __TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimized out by passing h = h&m
+            if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event
+                // condition above proves that 'h' has some other bits set beside 'm_old'
+                // find next applicable mask after m_old    //TODO: look at bsl instruction
+                for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size
+                    ;
+                m_old = (m_old<<1) - 1; // get full mask from a bit
+                __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL);
+                // check whether it is rehashing/ed
+                if( itt_load_word_with_acquire(get_bucket(h & m_old)->node_list) != rehash_req )
+                {
+#if __TBB_STATISTICS
+                    my_info_restarts++; // race collisions
+#endif
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        //! Insert a node and check for load factor. @return segment index to enable.
+        segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_t mask ) {
+            size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted
+            add_to_bucket( b, n );
+            // check load factor
+            if( sz >= mask ) { // TODO: add custom load_factor 
+                segment_index_t new_seg = __TBB_Log2( mask+1 ); //optimized segment_index_of
+                __TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocations must not publish new mask until segment has allocated");
+                if( !itt_hide_load_word(my_table[new_seg])
+                  && __TBB_CompareAndSwapW(&my_table[new_seg], 2, 0) == 0 )
+                    return new_seg; // The value must be processed
+            }
+            return 0;
+        }
+
+        //! Prepare enough segments for number of buckets
+        void reserve(size_type buckets) {
+            if( !buckets-- ) return;
+            bool is_initial = !my_size;
+            for( size_type m = my_mask; buckets > m; m = my_mask )
+                enable_segment( segment_index_of( m+1 ), is_initial );
+        }
+        //! Swap hash_map_bases
+        void internal_swap(hash_map_base &table) {
+            std::swap(this->my_mask, table.my_mask);
+            std::swap(this->my_size, table.my_size);
+            for(size_type i = 0; i < embedded_buckets; i++)
+                std::swap(this->my_embedded_segment[i].node_list, table.my_embedded_segment[i].node_list);
+            for(size_type i = embedded_block; i < pointers_per_table; i++)
+                std::swap(this->my_table[i], table.my_table[i]);
+        }
+    };
+
+    template<typename Iterator>
+    class hash_map_range;
+
+    //! Meets requirements of a forward iterator for STL */
+    /** Value is either the T or const T type of the container.
+        @ingroup containers */ 
+    template<typename Container, typename Value>
+    class hash_map_iterator
+        : public std::iterator<std::forward_iterator_tag,Value>
+    {
+        typedef Container map_type;
+        typedef typename Container::node node;
+        typedef hash_map_base::node_base node_base;
+        typedef hash_map_base::bucket bucket;
+
+        template<typename C, typename T, typename U>
+        friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
+
+        template<typename C, typename T, typename U>
+        friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
+
+        template<typename C, typename T, typename U>
+        friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
+    
+        template<typename C, typename U>
+        friend class hash_map_iterator;
+
+        template<typename I>
+        friend class hash_map_range;
+
+        void advance_to_next_bucket() { // TODO?: refactor to iterator_base class
+            size_t k = my_index+1;
+            while( my_bucket && k <= my_map->my_mask ) {
+                // Following test uses 2's-complement wizardry
+                if( k& (k-2) ) // not the beginning of a segment
+                    ++my_bucket;
+                else my_bucket = my_map->get_bucket( k );
+                my_node = static_cast<node*>( my_bucket->node_list );
+                if( hash_map_base::is_valid(my_node) ) {
+                    my_index = k; return;
+                }
+                ++k;
+            }
+            my_bucket = 0; my_node = 0; my_index = k; // the end
+        }
+#if !defined(_MSC_VER) || defined(__INTEL_COMPILER)
+        template<typename Key, typename T, typename HashCompare, typename A>
+        friend class interface5::concurrent_hash_map;
+#else
+    public: // workaround
+#endif
+        //! concurrent_hash_map over which we are iterating.
+        const Container *my_map;
+
+        //! Index in hash table for current item
+        size_t my_index;
+
+        //! Pointer to bucket
+        const bucket *my_bucket;
+
+        //! Pointer to node that has current item
+        node *my_node;
+
+        hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n );
+
+    public:
+        //! Construct undefined iterator
+        hash_map_iterator() {}
+        hash_map_iterator( const hash_map_iterator<Container,typename Container::value_type> &other ) :
+            my_map(other.my_map),
+            my_index(other.my_index),
+            my_bucket(other.my_bucket),
+            my_node(other.my_node)
+        {}
+        Value& operator*() const {
+            __TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator uninitialized or at end of container?" );
+            return my_node->item;
+        }
+        Value* operator->() const {return &operator*();}
+        hash_map_iterator& operator++();
+        
+        //! Post increment
+        hash_map_iterator operator++(int) {
+            hash_map_iterator old(*this);
+            operator++();
+            return old;
+        }
+    };
+
+    template<typename Container, typename Value>
+    hash_map_iterator<Container,Value>::hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n ) :
+        my_map(&map),
+        my_index(index),
+        my_bucket(b),
+        my_node( static_cast<node*>(n) )
+    {
+        if( b && !hash_map_base::is_valid(n) )
+            advance_to_next_bucket();
+    }
+
+    template<typename Container, typename Value>
+    hash_map_iterator<Container,Value>& hash_map_iterator<Container,Value>::operator++() {
+        my_node = static_cast<node*>( my_node->next );
+        if( !my_node ) advance_to_next_bucket();
+        return *this;
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
+        return i.my_node == j.my_node && i.my_map == j.my_map;
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
+        return i.my_node != j.my_node || i.my_map != j.my_map;
+    }
+
+    //! Range class used with concurrent_hash_map
+    /** @ingroup containers */ 
+    template<typename Iterator>
+    class hash_map_range {
+        typedef typename Iterator::map_type map_type;
+        Iterator my_begin;
+        Iterator my_end;
+        mutable Iterator my_midpoint;
+        size_t my_grainsize;
+        //! Set my_midpoint to point approximately half way between my_begin and my_end.
+        void set_midpoint() const;
+        template<typename U> friend class hash_map_range;
+    public:
+        //! Type for size of a range
+        typedef std::size_t size_type;
+        typedef typename Iterator::value_type value_type;
+        typedef typename Iterator::reference reference;
+        typedef typename Iterator::difference_type difference_type;
+        typedef Iterator iterator;
+
+        //! True if range is empty.
+        bool empty() const {return my_begin==my_end;}
+
+        //! True if range can be partitioned into two subranges.
+        bool is_divisible() const {
+            return my_midpoint!=my_end;
+        }
+        //! Split range.
+        hash_map_range( hash_map_range& r, split ) : 
+            my_end(r.my_end),
+            my_grainsize(r.my_grainsize)
+        {
+            r.my_end = my_begin = r.my_midpoint;
+            __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
+            __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
+            set_midpoint();
+            r.set_midpoint();
+        }
+        //! type conversion
+        template<typename U>
+        hash_map_range( hash_map_range<U>& r) : 
+            my_begin(r.my_begin),
+            my_end(r.my_end),
+            my_midpoint(r.my_midpoint),
+            my_grainsize(r.my_grainsize)
+        {}
+#if TBB_DEPRECATED
+        //! Init range with iterators and grainsize specified
+        hash_map_range( const Iterator& begin_, const Iterator& end_, size_type grainsize_ = 1 ) : 
+            my_begin(begin_), 
+            my_end(end_),
+            my_grainsize(grainsize_)
+        {
+            if(!my_end.my_index && !my_end.my_bucket) // end
+                my_end.my_index = my_end.my_map->my_mask + 1;
+            set_midpoint();
+            __TBB_ASSERT( grainsize_>0, "grainsize must be positive" );
+        }
+#endif
+        //! Init range with container and grainsize specified
+        hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : 
+            my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list ) ),
+            my_end( Iterator( map, map.my_mask + 1, 0, 0 ) ),
+            my_grainsize( grainsize_ )
+        {
+            __TBB_ASSERT( grainsize_>0, "grainsize must be positive" );
+            set_midpoint();
+        }
+        const Iterator& begin() const {return my_begin;}
+        const Iterator& end() const {return my_end;}
+        //! The grain size for this range.
+        size_type grainsize() const {return my_grainsize;}
+    };
+
+    template<typename Iterator>
+    void hash_map_range<Iterator>::set_midpoint() const {
+        // Split by groups of nodes
+        size_t m = my_end.my_index-my_begin.my_index;
+        if( m > my_grainsize ) {
+            m = my_begin.my_index + m/2u;
+            hash_map_base::bucket *b = my_begin.my_map->get_bucket(m);
+            my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list);
+        } else {
+            my_midpoint = my_end;
+        }
+        __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index,
+            "my_begin is after my_midpoint" );
+        __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index,
+            "my_midpoint is after my_end" );
+        __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end,
+            "[my_begin, my_midpoint) range should not be empty" );
+    }
+
+    } // internal
+//! @endcond
+
+//! Unordered map from Key to T.
+/** concurrent_hash_map is associative container with concurrent access.
+
+@par Compatibility
+    The class meets all Container Requirements from C++ Standard (See ISO/IEC 14882:2003(E), clause 23.1).
+
+@par Exception Safety
+    - Hash function is not permitted to throw an exception. User-defined types Key and T are forbidden from throwing an exception in destructors.
+    - If exception happens during insert() operations, it has no effect (unless exception raised by HashCompare::hash() function during grow_segment).
+    - If exception happens during operator=() operation, the container can have a part of source items, and methods size() and empty() can return wrong results.
+
+@par Changes since TBB 2.1
+    - Replaced internal algorithm and data structure. Patent is pending.
+    - Added buckets number argument for constructor
+
+@par Changes since TBB 2.0
+    - Fixed exception-safety
+    - Added template argument for allocator
+    - Added allocator argument in constructors
+    - Added constructor from a range of iterators
+    - Added several new overloaded insert() methods
+    - Added get_allocator()
+    - Added swap()
+    - Added count()
+    - Added overloaded erase(accessor &) and erase(const_accessor&)
+    - Added equal_range() [const]
+    - Added [const_]pointer, [const_]reference, and allocator_type types
+    - Added global functions: operator==(), operator!=(), and swap() 
+
+    @ingroup containers */
+template<typename Key, typename T, typename HashCompare, typename Allocator>
+class concurrent_hash_map : protected internal::hash_map_base {
+    template<typename Container, typename Value>
+    friend class internal::hash_map_iterator;
+
+    template<typename I>
+    friend class internal::hash_map_range;
+
+public:
+    typedef Key key_type;
+    typedef T mapped_type;
+    typedef std::pair<const Key,T> value_type;
+    typedef hash_map_base::size_type size_type;
+    typedef ptrdiff_t difference_type;
+    typedef value_type *pointer;
+    typedef const value_type *const_pointer;
+    typedef value_type &reference;
+    typedef const value_type &const_reference;
+    typedef internal::hash_map_iterator<concurrent_hash_map,value_type> iterator;
+    typedef internal::hash_map_iterator<concurrent_hash_map,const value_type> const_iterator;
+    typedef internal::hash_map_range<iterator> range_type;
+    typedef internal::hash_map_range<const_iterator> const_range_type;
+    typedef Allocator allocator_type;
+
+protected:
+    friend class const_accessor;
+    struct node;
+    typedef typename Allocator::template rebind<node>::other node_allocator_type;
+    node_allocator_type my_allocator;
+    HashCompare my_hash_compare;
+
+    struct node : public node_base {
+        value_type item;
+        node( const Key &key ) : item(key, T()) {}
+        node( const Key &key, const T &t ) : item(key, t) {}
+        // exception-safe allocation, see C++ Standard 2003, clause 5.3.4p17
+        void *operator new( size_t /*size*/, node_allocator_type &a ) {
+            void *ptr = a.allocate(1);
+            if(!ptr) 
+                tbb::internal::throw_exception(tbb::internal::eid_bad_alloc);
+            return ptr;
+        }
+        // match placement-new form above to be called if exception thrown in constructor
+        void operator delete( void *ptr, node_allocator_type &a ) {return a.deallocate(static_cast<node*>(ptr),1); }
+    };
+
+    void delete_node( node_base *n ) {
+        my_allocator.destroy( static_cast<node*>(n) );
+        my_allocator.deallocate( static_cast<node*>(n), 1);
+    }
+
+    node *search_bucket( const key_type &key, bucket *b ) const {
+        node *n = static_cast<node*>( b->node_list );
+        while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) )
+            n = static_cast<node*>( n->next );
+        __TBB_ASSERT(n != internal::rehash_req, "Search can be executed only for rehashed bucket");
+        return n;
+    }
+
+    //! bucket accessor is to find, rehash, acquire a lock, and access a bucket
+    class bucket_accessor : public bucket::scoped_t {
+        bucket *my_b;
+    public:
+        bucket_accessor( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) { acquire( base, h, writer ); }
+        //! find a bucket by masked hashcode, optionally rehash, and acquire the lock
+        inline void acquire( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) {
+            my_b = base->get_bucket( h );
+            // TODO: actually, notification is unnecessary here, just hiding double-check
+            if( itt_load_word_with_acquire(my_b->node_list) == internal::rehash_req
+                && try_acquire( my_b->mutex, /*write=*/true ) )
+            {
+                if( my_b->node_list == internal::rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing
+            }
+            else bucket::scoped_t::acquire( my_b->mutex, writer );
+            __TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL);
+        }
+        //! check whether bucket is locked for write
+        bool is_writer() { return bucket::scoped_t::is_writer; }
+        //! get bucket pointer
+        bucket *operator() () { return my_b; }
+    };
+
+    // TODO refactor to hash_base
+    void rehash_bucket( bucket *b_new, const hashcode_t h ) {
+        __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)");
+        __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
+        __TBB_store_with_release(b_new->node_list, internal::empty_rehashed); // mark rehashed
+        hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
+#if __TBB_STATISTICS
+        my_info_rehashes++; // invocations of rehash_bucket
+#endif
+
+        bucket_accessor b_old( this, h & mask );
+
+        mask = (mask<<1) | 1; // get full mask for new bucket
+        __TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL );
+    restart:
+        for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acquire(*p); is_valid(n); n = *p ) {
+            hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->item.first );
+#if TBB_USE_ASSERT
+            hashcode_t bmask = h & (mask>>1);
+            bmask = bmask==0? 1 : ( 1u<<(__TBB_Log2( bmask )+1 ) ) - 1; // minimal mask of parent bucket
+            __TBB_ASSERT( (c & bmask) == (h & bmask), "hash() function changed for key in table" );
+#endif
+            if( (c & mask) == h ) {
+                if( !b_old.is_writer() )
+                    if( !b_old.upgrade_to_writer() ) {
+                        goto restart; // node ptr can be invalid due to concurrent erase
+                    }
+                *p = n->next; // exclude from b_old
+                add_to_bucket( b_new, n );
+            } else p = &n->next; // iterate to next item
+        }
+    }
+
+public:
+    
+    class accessor;
+    //! Combines data access, locking, and garbage collection.
+    class const_accessor : private node::scoped_t /*which derived from no_copy*/ {
+        friend class concurrent_hash_map<Key,T,HashCompare,Allocator>;
+        friend class accessor;
+    public:
+        //! Type of value
+        typedef const typename concurrent_hash_map::value_type value_type;
+
+        //! True if result is empty.
+        bool empty() const {return !my_node;}
+
+        //! Set to null
+        void release() {
+            if( my_node ) {
+                node::scoped_t::release();
+                my_node = 0;
+            }
+        }
+
+        //! Return reference to associated value in hash table.
+        const_reference operator*() const {
+            __TBB_ASSERT( my_node, "attempt to dereference empty accessor" );
+            return my_node->item;
+        }
+
+        //! Return pointer to associated value in hash table.
+        const_pointer operator->() const {
+            return &operator*();
+        }
+
+        //! Create empty result
+        const_accessor() : my_node(NULL) {}
+
+        //! Destroy result after releasing the underlying reference.
+        ~const_accessor() {
+            my_node = NULL; // scoped lock's release() is called in its destructor
+        }
+    protected:
+        bool is_writer() { return node::scoped_t::is_writer; }
+        node *my_node;
+        hashcode_t my_hash;
+    };
+
+    //! Allows write access to elements and combines data access, locking, and garbage collection.
+    class accessor: public const_accessor {
+    public:
+        //! Type of value
+        typedef typename concurrent_hash_map::value_type value_type;
+
+        //! Return reference to associated value in hash table.
+        reference operator*() const {
+            __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" );
+            return this->my_node->item;
+        }
+
+        //! Return pointer to associated value in hash table.
+        pointer operator->() const {
+            return &operator*();
+        }
+    };
+
+    //! Construct empty table.
+    concurrent_hash_map(const allocator_type &a = allocator_type())
+        : internal::hash_map_base(), my_allocator(a)
+    {}
+
+    //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
+    concurrent_hash_map(size_type n, const allocator_type &a = allocator_type())
+        : my_allocator(a)
+    {
+        reserve( n );
+    }
+
+    //! Copy constructor
+    concurrent_hash_map( const concurrent_hash_map& table, const allocator_type &a = allocator_type())
+        : internal::hash_map_base(), my_allocator(a)
+    {
+        internal_copy(table);
+    }
+
+    //! Construction with copying iteration range and given allocator instance
+    template<typename I>
+    concurrent_hash_map(I first, I last, const allocator_type &a = allocator_type())
+        : my_allocator(a)
+    {
+        reserve( std::distance(first, last) ); // TODO: load_factor?
+        internal_copy(first, last);
+    }
+
+    //! Assignment
+    concurrent_hash_map& operator=( const concurrent_hash_map& table ) {
+        if( this!=&table ) {
+            clear();
+            internal_copy(table);
+        } 
+        return *this;
+    }
+
+
+    //! Rehashes and optionally resizes the whole table.
+    /** Useful to optimize performance before or after concurrent operations.
+        Also enables using of find() and count() concurrent methods in serial context. */
+    void rehash(size_type n = 0);
+    
+    //! Clear table
+    void clear();
+
+    //! Clear table and destroy it.  
+    ~concurrent_hash_map() { clear(); }
+
+    //------------------------------------------------------------------------
+    // Parallel algorithm support
+    //------------------------------------------------------------------------
+    range_type range( size_type grainsize=1 ) {
+        return range_type( *this, grainsize );
+    }
+    const_range_type range( size_type grainsize=1 ) const {
+        return const_range_type( *this, grainsize );
+    }
+
+    //------------------------------------------------------------------------
+    // STL support - not thread-safe methods
+    //------------------------------------------------------------------------
+    iterator begin() {return iterator(*this,0,my_embedded_segment,my_embedded_segment->node_list);}
+    iterator end() {return iterator(*this,0,0,0);}
+    const_iterator begin() const {return const_iterator(*this,0,my_embedded_segment,my_embedded_segment->node_list);}
+    const_iterator end() const {return const_iterator(*this,0,0,0);}
+    std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range(key, end()); }
+    std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range(key, end()); }
+    
+    //! Number of items in table.
+    size_type size() const { return my_size; }
+
+    //! True if size()==0.
+    bool empty() const { return my_size == 0; }
+
+    //! Upper bound on size.
+    size_type max_size() const {return (~size_type(0))/sizeof(node);}
+
+    //! Returns the current number of buckets
+    size_type bucket_count() const { return my_mask+1; }
+
+    //! return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    //! swap two instances. Iterators are invalidated
+    void swap(concurrent_hash_map &table);
+
+    //------------------------------------------------------------------------
+    // concurrent map operations
+    //------------------------------------------------------------------------
+
+    //! Return count of items (0 or 1)
+    size_type count( const Key &key ) const {
+        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, NULL, NULL, /*write=*/false );
+    }
+
+    //! Find item and acquire a read lock on the item.
+    /** Return true if item is found, false otherwise. */
+    bool find( const_accessor &result, const Key &key ) const {
+        result.release();
+        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, NULL, &result, /*write=*/false );
+    }
+
+    //! Find item and acquire a write lock on the item.
+    /** Return true if item is found, false otherwise. */
+    bool find( accessor &result, const Key &key ) {
+        result.release();
+        return lookup(/*insert*/false, key, NULL, &result, /*write=*/true );
+    }
+        
+    //! Insert item (if not already present) and acquire a read lock on the item.
+    /** Returns true if item is new. */
+    bool insert( const_accessor &result, const Key &key ) {
+        result.release();
+        return lookup(/*insert*/true, key, NULL, &result, /*write=*/false );
+    }
+
+    //! Insert item (if not already present) and acquire a write lock on the item.
+    /** Returns true if item is new. */
+    bool insert( accessor &result, const Key &key ) {
+        result.release();
+        return lookup(/*insert*/true, key, NULL, &result, /*write=*/true );
+    }
+
+    //! Insert item by copying if there is no such key present already and acquire a read lock on the item.
+    /** Returns true if item is new. */
+    bool insert( const_accessor &result, const value_type &value ) {
+        result.release();
+        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false );
+    }
+
+    //! Insert item by copying if there is no such key present already and acquire a write lock on the item.
+    /** Returns true if item is new. */
+    bool insert( accessor &result, const value_type &value ) {
+        result.release();
+        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true );
+    }
+
+    //! Insert item by copying if there is no such key present already
+    /** Returns true if item is inserted. */
+    bool insert( const value_type &value ) {
+        return lookup(/*insert*/true, value.first, &value.second, NULL, /*write=*/false );
+    }
+
+    //! Insert range [first, last)
+    template<typename I>
+    void insert(I first, I last) {
+        for(; first != last; ++first)
+            insert( *first );
+    }
+
+    //! Erase item.
+    /** Return true if item was erased by particularly this call. */
+    bool erase( const Key& key );
+
+    //! Erase item by const_accessor.
+    /** Return true if item was erased by particularly this call. */
+    bool erase( const_accessor& item_accessor ) {
+        return exclude( item_accessor );
+    }
+
+    //! Erase item by accessor.
+    /** Return true if item was erased by particularly this call. */
+    bool erase( accessor& item_accessor ) {
+        return exclude( item_accessor );
+    }
+
+protected:
+    //! Insert or find item and optionally acquire a lock on the item.
+    bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write );
+
+    //! delete item by accessor
+    bool exclude( const_accessor &item_accessor );
+
+    //! Returns an iterator for an item defined by the key, or for the next item after it (if upper==true)
+    template<typename I>
+    std::pair<I, I> internal_equal_range( const Key& key, I end ) const;
+
+    //! Copy "source" to *this, where *this must start out empty.
+    void internal_copy( const concurrent_hash_map& source );
+
+    template<typename I>
+    void internal_copy(I first, I last);
+
+    //! Fast find when no concurrent erasure is used. For internal use inside TBB only!
+    /** Return pointer to item with given key, or NULL if no such item exists.
+        Must not be called concurrently with erasure operations. */
+    const_pointer internal_fast_find( const Key& key ) const {
+        hashcode_t h = my_hash_compare.hash( key );
+        hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
+        node *n;
+    restart:
+        __TBB_ASSERT((m&(m+1))==0, NULL);
+        bucket *b = get_bucket( h & m );
+        // TODO: actually, notification is unnecessary here, just hiding double-check
+        if( itt_load_word_with_acquire(b->node_list) == internal::rehash_req )
+        {
+            bucket::scoped_t lock;
+            if( lock.try_acquire( b->mutex, /*write=*/true ) ) {
+                if( b->node_list == internal::rehash_req)
+                    const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing
+            }
+            else lock.acquire( b->mutex, /*write=*/false );
+            __TBB_ASSERT(b->node_list!=internal::rehash_req,NULL);
+        }
+        n = search_bucket( key, b );
+        if( n )
+            return &n->item;
+        else if( check_mask_race( h, m ) )
+            goto restart;
+        return 0;
+    }
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Suppress "conditional expression is constant" warning.
+    #pragma warning( push )
+    #pragma warning( disable: 4127 )
+#endif
+
+template<typename Key, typename T, typename HashCompare, typename A>
+bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write ) {
+    __TBB_ASSERT( !result || !result->my_node, NULL );
+    bool return_value;
+    hashcode_t const h = my_hash_compare.hash( key );
+    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
+    segment_index_t grow_segment = 0;
+    node *n, *tmp_n = 0;
+    restart:
+    {//lock scope
+        __TBB_ASSERT((m&(m+1))==0, NULL);
+        return_value = false;
+        // get bucket
+        bucket_accessor b( this, h & m );
+
+        // find a node
+        n = search_bucket( key, b() );
+        if( op_insert ) {
+            // [opt] insert a key
+            if( !n ) {
+                if( !tmp_n ) {
+                    if(t) tmp_n = new( my_allocator ) node(key, *t);
+                    else  tmp_n = new( my_allocator ) node(key);
+                }
+                if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion
+                    // Rerun search_list, in case another thread inserted the item during the upgrade.
+                    n = search_bucket( key, b() );
+                    if( is_valid(n) ) { // unfortunately, it did
+                        b.downgrade_to_reader();
+                        goto exists;
+                    }
+                }
+                if( check_mask_race(h, m) )
+                    goto restart; // b.release() is done in ~b().
+                // insert and set flag to grow the container
+                grow_segment = insert_new_node( b(), n = tmp_n, m );
+                tmp_n = 0;
+                return_value = true;
+            }
+        } else { // find or count
+            if( !n ) {
+                if( check_mask_race( h, m ) )
+                    goto restart; // b.release() is done in ~b(). TODO: replace by continue
+                return false;
+            }
+            return_value = true;
+        }
+    exists:
+        if( !result ) goto check_growth;
+        // TODO: the following seems as generic/regular operation
+        // acquire the item
+        if( !result->try_acquire( n->mutex, write ) ) {
+            // we are unlucky, prepare for longer wait
+            tbb::internal::atomic_backoff trials;
+            do {
+                if( !trials.bounded_pause() ) {
+                    // the wait takes really long, restart the operation
+                    b.release();
+                    __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" );
+                    __TBB_Yield();
+                    m = (hashcode_t) itt_load_word_with_acquire( my_mask );
+                    goto restart;
+                }
+            } while( !result->try_acquire( n->mutex, write ) );
+        }
+    }//lock scope
+    result->my_node = n;
+    result->my_hash = h;
+check_growth:
+    // [opt] grow the container
+    if( grow_segment ) {
+#if __TBB_STATISTICS
+        my_info_resizes++; // concurrent ones
+#endif
+        enable_segment( grow_segment );
+    }
+    if( tmp_n ) // if op_insert only
+        delete_node( tmp_n );
+    return return_value;
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+template<typename I>
+std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_range( const Key& key, I end_ ) const {
+    hashcode_t h = my_hash_compare.hash( key );
+    hashcode_t m = my_mask;
+    __TBB_ASSERT((m&(m+1))==0, NULL);
+    h &= m;
+    bucket *b = get_bucket( h );
+    while( b->node_list == internal::rehash_req ) {
+        m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
+        b = get_bucket( h &= m );
+    }
+    node *n = search_bucket( key, b );
+    if( !n )
+        return std::make_pair(end_, end_);
+    iterator lower(*this, h, b, n), upper(lower);
+    return std::make_pair(lower, ++upper);
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &item_accessor ) {
+    __TBB_ASSERT( item_accessor.my_node, NULL );
+    node_base *const n = item_accessor.my_node;
+    hashcode_t const h = item_accessor.my_hash;
+    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
+    do {
+        // get bucket
+        bucket_accessor b( this, h & m, /*writer=*/true );
+        node_base **p = &b()->node_list;
+        while( *p && *p != n )
+            p = &(*p)->next;
+        if( !*p ) { // someone else was the first
+            if( check_mask_race( h, m ) )
+                continue;
+            item_accessor.release();
+            return false;
+        }
+        __TBB_ASSERT( *p == n, NULL );
+        *p = n->next; // remove from container
+        my_size--;
+        break;
+    } while(true);
+    if( !item_accessor.is_writer() ) // need to get exclusive lock
+        item_accessor.upgrade_to_writer(); // return value means nothing here
+    item_accessor.release();
+    delete_node( n ); // Only one thread can delete it
+    return true;
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) {
+    node_base *n;
+    hashcode_t const h = my_hash_compare.hash( key );
+    hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
+restart:
+    {//lock scope
+        // get bucket
+        bucket_accessor b( this, h & m );
+    search:
+        node_base **p = &b()->node_list;
+        n = *p;
+        while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node*>(n)->item.first ) ) {
+            p = &n->next;
+            n = *p;
+        }
+        if( !n ) { // not found, but mask could be changed
+            if( check_mask_race( h, m ) )
+                goto restart;
+            return false;
+        }
+        else if( !b.is_writer() && !b.upgrade_to_writer() ) {
+            if( check_mask_race( h, m ) ) // contended upgrade, check mask
+                goto restart;
+            goto search;
+        }
+        *p = n->next;
+        my_size--;
+    }
+    {
+        typename node::scoped_t item_locker( n->mutex, /*write=*/true );
+    }
+    // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor!
+    delete_node( n ); // Only one thread can delete it due to write lock on the bucket
+    return true;
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+void concurrent_hash_map<Key,T,HashCompare,A>::swap(concurrent_hash_map<Key,T,HashCompare,A> &table) {
+    std::swap(this->my_allocator, table.my_allocator);
+    std::swap(this->my_hash_compare, table.my_hash_compare);
+    internal_swap(table);
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) {
+    reserve( sz ); // TODO: add reduction of number of buckets as well
+    hashcode_t mask = my_mask;
+    hashcode_t b = (mask+1)>>1; // size or first index of the last segment
+    __TBB_ASSERT((b&(b-1))==0, NULL);
+    bucket *bp = get_bucket( b ); // only the last segment should be scanned for rehashing
+    for(; b <= mask; b++, bp++ ) {
+        node_base *n = bp->node_list;
+        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
+        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
+        if( n == internal::rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one
+            hashcode_t h = b; bucket *b_old = bp;
+            do {
+                __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
+                hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask from the topmost bit
+                b_old = get_bucket( h &= m );
+            } while( b_old->node_list == internal::rehash_req );
+            // now h - is index of the root rehashed bucket b_old
+            mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments
+            for( node_base **p = &b_old->node_list, *q = *p; is_valid(q); q = *p ) {
+                hashcode_t c = my_hash_compare.hash( static_cast<node*>(q)->item.first );
+                if( (c & mask) != h ) { // should be rehashed
+                    *p = q->next; // exclude from b_old
+                    bucket *b_new = get_bucket( c & mask );
+                    __TBB_ASSERT( b_new->node_list != internal::rehash_req, "hash() function changed for key in table or internal error" );
+                    add_to_bucket( b_new, q );
+                } else p = &q->next; // iterate to next item
+            }
+        }
+    }
+#if TBB_USE_PERFORMANCE_WARNINGS
+    int current_size = int(my_size), buckets = int(mask)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics
+    static bool reported = false;
+#endif
+#if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS
+    for( b = 0; b <= mask; b++ ) {// only last segment should be scanned for rehashing
+        if( b & (b-2) ) ++bp; // not the beginning of a segment
+        else bp = get_bucket( b );
+        node_base *n = bp->node_list;
+        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
+        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed, "Broken internal structure" );
+#if TBB_USE_PERFORMANCE_WARNINGS
+        if( n == internal::empty_rehashed ) empty_buckets++;
+        else if( n->next ) overpopulated_buckets++;
+#endif
+#if TBB_USE_ASSERT
+        for( ; is_valid(n); n = n->next ) {
+            hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->item.first ) & mask;
+            __TBB_ASSERT( h == b, "hash() function changed for key in table or internal error" );
+        }
+#endif
+    }
+#endif // TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS
+#if TBB_USE_PERFORMANCE_WARNINGS
+    if( buckets > current_size) empty_buckets -= buckets - current_size;
+    else overpopulated_buckets -= current_size - buckets; // TODO: load_factor?
+    if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) {
+        tbb::internal::runtime_warning(
+            "Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d  Empties: %d  Overlaps: %d",
+            typeid(*this).name(), current_size, empty_buckets, overpopulated_buckets );
+        reported = true;
+    }
+#endif
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+void concurrent_hash_map<Key,T,HashCompare,A>::clear() {
+    hashcode_t m = my_mask;
+    __TBB_ASSERT((m&(m+1))==0, NULL);
+#if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+    int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics
+    static bool reported = false;
+#endif
+    bucket *bp = 0;
+    // check consistency
+    for( segment_index_t b = 0; b <= m; b++ ) {
+        if( b & (b-2) ) ++bp; // not the beginning of a segment
+        else bp = get_bucket( b );
+        node_base *n = bp->node_list;
+        __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
+        __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during clear() execution" );
+#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+        if( n == internal::empty_rehashed ) empty_buckets++;
+        else if( n == internal::rehash_req ) buckets--;
+        else if( n->next ) overpopulated_buckets++;
+#endif
+#if __TBB_EXTRA_DEBUG
+        for(; is_valid(n); n = n->next ) {
+            hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->item.first );
+            h &= m;
+            __TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::rehash_req, "hash() function changed for key in table or internal error" );
+        }
+#endif
+    }
+#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+#if __TBB_STATISTICS
+    printf( "items=%d buckets: capacity=%d rehashed=%d empty=%d overpopulated=%d"
+        " concurrent: resizes=%u rehashes=%u restarts=%u\n",
+        current_size, int(m+1), buckets, empty_buckets, overpopulated_buckets,
+        unsigned(my_info_resizes), unsigned(my_info_rehashes), unsigned(my_info_restarts) );
+    my_info_resizes = 0; // concurrent ones
+    my_info_restarts = 0; // race collisions
+    my_info_rehashes = 0;  // invocations of rehash_bucket
+#endif
+    if( buckets > current_size) empty_buckets -= buckets - current_size;
+    else overpopulated_buckets -= current_size - buckets; // TODO: load_factor?
+    if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) {
+        tbb::internal::runtime_warning(
+            "Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d  Empties: %d  Overlaps: %d",
+            typeid(*this).name(), current_size, empty_buckets, overpopulated_buckets );
+        reported = true;
+    }
+#endif
+#endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
+    my_size = 0;
+    segment_index_t s = segment_index_of( m );
+    __TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask or concurrent grow" );
+    cache_aligned_allocator<bucket> alloc;
+    do {
+        __TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent grow" );
+        segment_ptr_t buckets_ptr = my_table[s];
+        size_type sz = segment_size( s ? s : 1 );
+        for( segment_index_t i = 0; i < sz; i++ )
+            for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n = buckets_ptr[i].node_list ) {
+                buckets_ptr[i].node_list = n->next;
+                delete_node( n );
+            }
+        if( s >= first_block) // the first segment or the next
+            alloc.deallocate( buckets_ptr, sz );
+        else if( s == embedded_block && embedded_block != first_block )
+            alloc.deallocate( buckets_ptr, segment_size(first_block)-embedded_buckets );
+        if( s >= embedded_block ) my_table[s] = 0;
+    } while(s-- > 0);
+    my_mask = embedded_buckets - 1;
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+void concurrent_hash_map<Key,T,HashCompare,A>::internal_copy( const concurrent_hash_map& source ) {
+    reserve( source.my_size ); // TODO: load_factor?
+    hashcode_t mask = source.my_mask;
+    if( my_mask == mask ) { // optimized version
+        bucket *dst = 0, *src = 0;
+        bool rehash_required = false;
+        for( hashcode_t k = 0; k <= mask; k++ ) {
+            if( k & (k-2) ) ++dst,src++; // not the beginning of a segment
+            else { dst = get_bucket( k ); src = source.get_bucket( k ); }
+            __TBB_ASSERT( dst->node_list != internal::rehash_req, "Invalid bucket in destination table");
+            node *n = static_cast<node*>( src->node_list );
+            if( n == internal::rehash_req ) { // source is not rehashed, items are in previous buckets
+                rehash_required = true;
+                dst->node_list = internal::rehash_req;
+            } else for(; n; n = static_cast<node*>( n->next ) ) {
+                add_to_bucket( dst, new( my_allocator ) node(n->item.first, n->item.second) );
+                ++my_size; // TODO: replace by non-atomic op
+            }
+        }
+        if( rehash_required ) rehash();
+    } else internal_copy( source.begin(), source.end() );
+}
+
+template<typename Key, typename T, typename HashCompare, typename A>
+template<typename I>
+void concurrent_hash_map<Key,T,HashCompare,A>::internal_copy(I first, I last) {
+    hashcode_t m = my_mask;
+    for(; first != last; ++first) {
+        hashcode_t h = my_hash_compare.hash( first->first );
+        bucket *b = get_bucket( h & m );
+        __TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket in destination table");
+        node *n = new( my_allocator ) node(first->first, first->second);
+        add_to_bucket( b, n );
+        ++my_size; // TODO: replace by non-atomic op
+    }
+}
+
+} // namespace interface5
+
+using interface5::concurrent_hash_map;
+
+
+template<typename Key, typename T, typename HashCompare, typename A1, typename A2>
+inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) {
+    if(a.size() != b.size()) return false;
+    typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end());
+    typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end());
+    for(; i != i_end; ++i) {
+        j = b.equal_range(i->first).first;
+        if( j == j_end || !(i->second == j->second) ) return false;
+    }
+    return true;
+}
+
+template<typename Key, typename T, typename HashCompare, typename A1, typename A2>
+inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b)
+{    return !(a == b); }
+
+template<typename Key, typename T, typename HashCompare, typename A>
+inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b)
+{    a.swap( b ); }
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning( pop )
+#endif // warning 4127 is back
+
+} // namespace tbb
+
+#endif /* __TBB_concurrent_hash_map_H */
diff --git a/tbb/include/tbb/concurrent_priority_queue.h b/tbb/include/tbb/concurrent_priority_queue.h
new file mode 100644 (file)
index 0000000..8a9f252
--- /dev/null
@@ -0,0 +1,364 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_concurrent_priority_queue_H
+#define __TBB_concurrent_priority_queue_H
+
+#if !TBB_PREVIEW_CONCURRENT_PRIORITY_QUEUE
+#error Set TBB_PREVIEW_CONCURRENT_PRIORITY_QUEUE to include concurrent_priority_queue.h
+#endif
+
+#include "atomic.h"
+#include "cache_aligned_allocator.h"
+#include "tbb_exception.h"
+#include "tbb_stddef.h"
+#include "tbb_profiling.h"
+#include "_aggregator_internal.h"
+#include <vector>
+#include <iterator>
+#include <functional>
+
+namespace tbb {
+namespace interface5 {
+
+using namespace tbb::internal;
+
+//! Concurrent priority queue
+template <typename T, typename Compare=std::less<T>, typename A=cache_aligned_allocator<T> >
+class concurrent_priority_queue {
+ public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    typedef size_t size_type;
+
+    //! Difference type for iterator
+    typedef ptrdiff_t difference_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Constructs a new concurrent_priority_queue with default capacity
+    explicit concurrent_priority_queue(const allocator_type& a = allocator_type()) : mark(0), data(a) {
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! Constructs a new concurrent_priority_queue with init_sz capacity
+    explicit concurrent_priority_queue(size_type init_capacity, const allocator_type& a = allocator_type()) : mark(0), data(a) {
+        data.reserve(init_capacity);
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_priority_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : data(begin, end, a)
+    {
+        mark = 0;
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+    }
+
+    //! Copy constructor
+    /** State of this queue may not reflect results of pending
+       operations on the copied queue. */
+    explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark), data(src.data.begin(), src.data.end(), src.data.get_allocator())
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+    }
+
+    concurrent_priority_queue(const concurrent_priority_queue& src, const allocator_type& a) : mark(src.mark), data(src.data.begin(), src.data.end(), a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+    }
+
+    //! Assignment operator
+    /** State of this queue may not reflect results of pending
+       operations on the copied queue. */
+    concurrent_priority_queue& operator=(const concurrent_priority_queue& src) {
+        if (this != &src) {
+            std::vector<value_type, allocator_type>(src.data.begin(), src.data.end(), src.data.get_allocator()).swap(data);
+            mark = src.mark;
+        }
+        return *this;
+    }
+
+    //! Returns true if empty, false otherwise
+    /** Returned value may not reflect results of pending operations. */
+    bool empty() const { return data.empty(); }
+
+    //! Returns the current number of elements contained in the queue
+    /** Returned value may not reflect results of pending operations. */
+    size_type size() const { return data.size(); }
+
+    //! Returns the current capacity (i.e. allocated storage) of the queue
+    /** Returned value may not reflect results of pending operations. */
+    size_type capacity() const { return data.capacity(); }
+
+    //! Pushes elem onto the queue, increasing capacity of queue if necessary
+    void push(const_reference elem) {
+        cpq_operation op_data(elem, PUSH_OP);
+        my_aggregator.execute(&op_data);
+        if (op_data.status == FAILED) // exception thrown
+            throw_exception(eid_bad_alloc);
+    }
+
+    //! Gets a reference to and removes highest priority element
+    /** If a highest priority element was found, sets elem and returns true,
+        otherwise returns false. */
+    bool try_pop(reference elem) {
+        cpq_operation op_data(POP_OP);
+        op_data.elem = &elem;
+        my_aggregator.execute(&op_data);
+        return op_data.status==SUCCEEDED;
+    }
+
+    //! If current capacity is less than new_cap, increases capacity to new_cap
+    void reserve(size_type new_cap) {
+        cpq_operation op_data(RESERVE_OP);
+        op_data.sz = new_cap;
+        my_aggregator.execute(&op_data);
+        if (op_data.status == FAILED) // exception thrown
+            throw_exception(eid_bad_alloc);
+    }
+
+    //! Clear the queue; not thread-safe
+    /** Resets size, effectively emptying queue; does not free space.
+        May not clear elements added in pending operations. */
+    void clear() {
+        data.clear();
+        mark = 0;
+    }
+
+    //! Shrink queue capacity to current contents; not thread-safe
+    void shrink_to_fit() {
+        std::vector<value_type, allocator_type>(data.begin(), data.end(), data.get_allocator()).swap(data);
+    }
+
+    //! Swap this queue with another; not thread-safe
+    void swap(concurrent_priority_queue& q) {
+        data.swap(q.data);
+        std::swap(mark, q.mark);
+    }
+
+    //! Return allocator object
+    allocator_type get_allocator() const { return data.get_allocator(); }
+
+ private:
+    enum operation_type {INVALID_OP, PUSH_OP, POP_OP, RESERVE_OP};
+    enum operation_status { WAIT=0, SUCCEEDED, FAILED };
+
+    class cpq_operation : public aggregated_operation<cpq_operation> {
+     public:
+        operation_type type;
+        union {
+            value_type *elem;
+            size_type sz;
+        };
+        cpq_operation(const_reference e, operation_type t) :
+            type(t), elem(const_cast<value_type*>(&e)) {}
+        cpq_operation(operation_type t) : type(t) {}
+    };
+
+    class my_functor_t {
+        concurrent_priority_queue<T, Compare, A> *cpq;
+     public:
+        my_functor_t() {}
+        my_functor_t(concurrent_priority_queue<T, Compare, A> *cpq_) : cpq(cpq_) {}
+        void operator()(cpq_operation* op_list) {
+            cpq->handle_operations(op_list);
+        }
+    };
+
+    aggregator< my_functor_t, cpq_operation> my_aggregator;
+    //! Padding added to avoid false sharing
+    char padding1[NFS_MaxLineSize - sizeof(aggregator< my_functor_t, cpq_operation >)];
+    //! The point at which unsorted elements begin
+    size_type mark;
+    Compare compare;
+    //! Padding added to avoid false sharing
+    char padding2[NFS_MaxLineSize - sizeof(size_type) - sizeof(Compare)];
+    //! Storage for the heap of elements in queue, plus unheapified elements
+    /** data has the following structure:
+
+         binary unheapified
+          heap   elements
+        ____|_______|____
+        |       |       |
+        v       v       v
+        [_|...|_|_|...|_| |...| ]
+         0       ^       ^       ^
+                 |       |       |__capacity
+                 |       |__size
+                 |__mark
+                 
+
+        Thus, data stores the binary heap starting at position 0 through
+        mark-1 (it may be empty).  Then there are 0 or more elements 
+        that have not yet been inserted into the heap, in positions 
+        mark through size-1. */
+    std::vector<value_type, allocator_type> data;
+
+    void handle_operations(cpq_operation *op_list) {
+        cpq_operation *tmp, *pop_list=NULL;
+
+        __TBB_ASSERT(mark == data.size(), NULL);
+
+        // first pass processes all constant time operations: pushes,
+        // tops, some pops. Also reserve.
+        while (op_list) {
+            // ITT note: &(op_list->status) tag is used to cover accesses to op_list
+            // node. This thread is going to handle the operation, and so will acquire it
+            // and perform the associated operation w/o triggering a race condition; the
+            // thread that created the operation is waiting on the status field, so when
+            // this thread is done with the operation, it will perform a
+            // store_with_release to give control back to the waiting thread in
+            // aggregator::insert_operation.
+            call_itt_notify(acquired, &(op_list->status));
+            __TBB_ASSERT(op_list->type != INVALID_OP, NULL);
+            tmp = op_list;
+            op_list = itt_hide_load_word(op_list->next);
+            if (tmp->type == PUSH_OP) {
+                __TBB_TRY {
+                    data.push_back(*(tmp->elem));
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                } __TBB_CATCH(...) {
+                    itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
+                }
+            }
+            else if (tmp->type == POP_OP) {
+                if (mark < data.size() &&
+                    compare(data[0], data[data.size()-1])) {
+                    // there are newly pushed elems and the last one
+                    // is higher than top
+                    *(tmp->elem) = data[data.size()-1]; // copy the data
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    data.pop_back();
+                    __TBB_ASSERT(mark<=data.size(), NULL);
+                }
+                else { // no convenient item to pop; postpone
+                    itt_hide_store_word(tmp->next, pop_list);
+                    pop_list = tmp;
+                }
+            }
+            else {
+                __TBB_ASSERT(tmp->type == RESERVE_OP, NULL);
+                __TBB_TRY {
+                    data.reserve(tmp->sz);
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                } __TBB_CATCH(...) {
+                    itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
+                }
+            }
+        }
+
+        // second pass processes pop operations
+        while (pop_list) {
+            tmp = pop_list;
+            pop_list = itt_hide_load_word(pop_list->next);
+            __TBB_ASSERT(tmp->type == POP_OP, NULL);
+            if (data.empty()) {
+                itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
+            }
+            else {
+                __TBB_ASSERT(mark<=data.size(), NULL);
+                if (mark < data.size() &&
+                    compare(data[0], data[data.size()-1])) {
+                    // there are newly pushed elems and the last one is
+                    // higher than top
+                    *(tmp->elem) = data[data.size()-1]; // copy the data
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    data.pop_back();
+                }
+                else { // extract top and push last element down heap
+                    *(tmp->elem) = data[0]; // copy the data
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    reheap();
+                }
+            }
+        }
+
+        // heapify any leftover pushed elements before doing the next
+        // batch of operations
+        if (mark<data.size()) heapify();
+        __TBB_ASSERT(mark == data.size(), NULL);
+    }
+
+    //! Merge unsorted elements into heap
+    void heapify() {
+        if (!mark) mark = 1;
+        for (; mark<data.size(); ++mark) {
+            // for each unheapified element under size
+            size_type cur_pos = mark;
+            value_type to_place = data[mark];
+            do { // push to_place up the heap
+                size_type parent = (cur_pos-1)>>1;
+                if (!compare(data[parent], to_place)) break;
+                data[cur_pos] = data[parent];
+                cur_pos = parent;
+            } while( cur_pos );
+            data[cur_pos] = to_place;
+        }
+    }
+
+    //! Re-heapify after an extraction
+    /** Re-heapify by pushing last element down the heap from the root. */
+    void reheap() {
+        size_type cur_pos=0, child=1;
+
+        while (child < mark) {
+            size_type target = child;
+            if (child+1 < mark && compare(data[child], data[child+1]))
+                ++target;
+            // target now has the higher priority child
+            if (compare(data[target], data[data.size()-1])) break;
+            data[cur_pos] = data[target];
+            cur_pos = target;
+            child = (cur_pos<<1)+1;
+        }
+        data[cur_pos] = data[data.size()-1];
+        data.pop_back();
+        if (mark > data.size()) mark = data.size();
+    }
+};
+
+} // namespace interface5
+
+using interface5::concurrent_priority_queue;
+
+} // namespace tbb
+
+#endif /* __TBB_concurrent_priority_queue_H */
diff --git a/tbb/include/tbb/concurrent_queue.h b/tbb/include/tbb/concurrent_queue.h
new file mode 100644 (file)
index 0000000..5ea6909
--- /dev/null
@@ -0,0 +1,413 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_concurrent_queue_H
+#define __TBB_concurrent_queue_H
+
+#include "_concurrent_queue_internal.h"
+
+namespace tbb {
+
+namespace strict_ppl {
+
+//! A high-performance thread-safe non-blocking concurrent queue.
+/** Multiple threads may each push and pop concurrently.
+    Assignment construction is not allowed.
+    @ingroup containers */
+template<typename T, typename A = cache_aligned_allocator<T> > 
+class concurrent_queue: public internal::concurrent_queue_base_v3<T> {
+    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
+
+    //! Allocator type
+    typedef typename A::template rebind<char>::other page_allocator_type;
+    page_allocator_type my_allocator;
+
+    //! Allocates a block of size n (bytes)
+    /*overide*/ virtual void *allocate_block( size_t n ) {
+        void *b = reinterpret_cast<void*>(my_allocator.allocate( n ));
+        if( !b )
+            internal::throw_exception(internal::eid_bad_alloc); 
+        return b;
+    }
+
+    //! Deallocates block created by allocate_block.
+    /*override*/ virtual void deallocate_block( void *b, size_t n ) {
+        my_allocator.deallocate( reinterpret_cast<char*>(b), n );
+    }
+
+public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    typedef size_t size_type;
+
+    //! Difference type for iterator
+    typedef ptrdiff_t difference_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Construct empty queue
+    explicit concurrent_queue(const allocator_type& a = allocator_type()) : 
+        my_allocator( a )
+    {
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
+        my_allocator( a )
+    {
+        for( ; begin != end; ++begin )
+            this->internal_push(&*begin);
+    }
+    
+    //! Copy constructor
+    concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) : 
+        internal::concurrent_queue_base_v3<T>(), my_allocator( a )
+    {
+        this->assign( src );
+    }
+    
+    //! Destroy queue
+    ~concurrent_queue();
+
+    //! Enqueue an item at tail of queue.
+    void push( const T& source ) {
+        this->internal_push( &source );
+    }
+
+    //! Attempt to dequeue an item from head of queue.
+    /** Does not wait for item to become available.
+        Returns true if successful; false otherwise. */
+    bool try_pop( T& result ) {
+        return this->internal_try_pop( &result );
+    }
+
+    //! Return the number of items in the queue; thread unsafe
+    size_type unsafe_size() const {return this->internal_size();}
+
+    //! Equivalent to size()==0.
+    bool empty() const {return this->internal_empty();}
+
+    //! Clear the queue. not thread-safe.
+    void clear() ;
+
+    //! Return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    typedef internal::concurrent_queue_iterator<concurrent_queue,T> iterator;
+    typedef internal::concurrent_queue_iterator<concurrent_queue,const T> const_iterator;
+
+    //------------------------------------------------------------------------
+    // The iterators are intended only for debugging.  They are slow and not thread safe.
+    //------------------------------------------------------------------------
+    iterator unsafe_begin() {return iterator(*this);}
+    iterator unsafe_end() {return iterator();}
+    const_iterator unsafe_begin() const {return const_iterator(*this);}
+    const_iterator unsafe_end() const {return const_iterator();}
+} ;
+
+template<typename T, class A>
+concurrent_queue<T,A>::~concurrent_queue() {
+    clear();
+    this->internal_finish_clear();
+}
+
+template<typename T, class A>
+void concurrent_queue<T,A>::clear() {
+    while( !empty() ) {
+        T value;
+        this->internal_try_pop(&value);
+    }
+}
+
+} // namespace strict_ppl
+    
+//! A high-performance thread-safe blocking concurrent bounded queue.
+/** This is the pre-PPL TBB concurrent queue which supports boundedness and blocking semantics.
+    Note that method names agree with the PPL-style concurrent queue.
+    Multiple threads may each push and pop concurrently.
+    Assignment construction is not allowed.
+    @ingroup containers */
+template<typename T, class A = cache_aligned_allocator<T> >
+class concurrent_bounded_queue: public internal::concurrent_queue_base_v3 {
+    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
+
+    //! Allocator type
+    typedef typename A::template rebind<char>::other page_allocator_type;
+    page_allocator_type my_allocator;
+
+    typedef typename concurrent_queue_base_v3::padded_page<T> padded_page;
+    //! Class used to ensure exception-safety of method "pop" 
+    class destroyer: internal::no_copy {
+        T& my_value;
+    public:
+        destroyer( T& value ) : my_value(value) {}
+        ~destroyer() {my_value.~T();}          
+    };
+
+    T& get_ref( page& p, size_t index ) {
+        __TBB_ASSERT( index<items_per_page, NULL );
+        return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
+    }
+
+    /*override*/ virtual void copy_item( page& dst, size_t index, const void* src ) {
+        new( &get_ref(dst,index) ) T(*static_cast<const T*>(src)); 
+    }
+
+    /*override*/ virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) {
+        new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sindex ) );
+    }
+
+    /*override*/ virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) {
+        T& from = get_ref(src,index);
+        destroyer d(from);
+        *static_cast<T*>(dst) = from;
+    }
+
+    /*overide*/ virtual page *allocate_page() {
+        size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
+        page *p = reinterpret_cast<page*>(my_allocator.allocate( n ));
+        if( !p )
+            internal::throw_exception(internal::eid_bad_alloc); 
+        return p;
+    }
+
+    /*override*/ virtual void deallocate_page( page *p ) {
+        size_t n = sizeof(padded_page) + items_per_page*sizeof(T);
+        my_allocator.deallocate( reinterpret_cast<char*>(p), n );
+    }
+
+public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    /** Note that the size_type is a signed integral type.
+        This is because the size can be negative if there are pending pops without corresponding pushes. */
+    typedef std::ptrdiff_t size_type;
+
+    //! Difference type for iterator
+    typedef std::ptrdiff_t difference_type;
+
+    //! Construct empty queue
+    explicit concurrent_bounded_queue(const allocator_type& a = allocator_type()) : 
+        concurrent_queue_base_v3( sizeof(T) ), my_allocator( a )
+    {
+    }
+
+    //! Copy constructor
+    concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a = allocator_type()) : 
+        concurrent_queue_base_v3( sizeof(T) ), my_allocator( a )
+    {
+        assign( src );
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
+        concurrent_queue_base_v3( sizeof(T) ), my_allocator( a )
+    {
+        for( ; begin != end; ++begin )
+            internal_push_if_not_full(&*begin);
+    }
+
+    //! Destroy queue
+    ~concurrent_bounded_queue();
+
+    //! Enqueue an item at tail of queue.
+    void push( const T& source ) {
+        internal_push( &source );
+    }
+
+    //! Dequeue item from head of queue.
+    /** Block until an item becomes available, and then dequeue it. */
+    void pop( T& destination ) {
+        internal_pop( &destination );
+    }
+
+    //! Enqueue an item at tail of queue if queue is not already full.
+    /** Does not wait for queue to become not full.
+        Returns true if item is pushed; false if queue was already full. */
+    bool try_push( const T& source ) {
+        return internal_push_if_not_full( &source );
+    }
+
+    //! Attempt to dequeue an item from head of queue.
+    /** Does not wait for item to become available.
+        Returns true if successful; false otherwise. */
+    bool try_pop( T& destination ) {
+        return internal_pop_if_present( &destination );
+    }
+
+    //! Return number of pushes minus number of pops.
+    /** Note that the result can be negative if there are pops waiting for the 
+        corresponding pushes.  The result can also exceed capacity() if there 
+        are push operations in flight. */
+    size_type size() const {return internal_size();}
+
+    //! Equivalent to size()<=0.
+    bool empty() const {return internal_empty();}
+
+    //! Maximum number of allowed elements
+    size_type capacity() const {
+        return my_capacity;
+    }
+
+    //! Set the capacity
+    /** Setting the capacity to 0 causes subsequent try_push operations to always fail,
+        and subsequent push operations to block forever. */
+    void set_capacity( size_type new_capacity ) {
+        internal_set_capacity( new_capacity, sizeof(T) );
+    }
+
+    //! return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    //! clear the queue. not thread-safe.
+    void clear() ;
+
+    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,T> iterator;
+    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,const T> const_iterator;
+
+    //------------------------------------------------------------------------
+    // The iterators are intended only for debugging.  They are slow and not thread safe.
+    //------------------------------------------------------------------------
+    iterator unsafe_begin() {return iterator(*this);}
+    iterator unsafe_end() {return iterator();}
+    const_iterator unsafe_begin() const {return const_iterator(*this);}
+    const_iterator unsafe_end() const {return const_iterator();}
+
+}; 
+
+template<typename T, class A>
+concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
+    clear();
+    internal_finish_clear();
+}
+
+template<typename T, class A>
+void concurrent_bounded_queue<T,A>::clear() {
+    while( !empty() ) {
+        T value;
+        internal_pop_if_present(&value);
+    }
+}
+
+namespace deprecated {
+
+//! A high-performance thread-safe blocking concurrent bounded queue.
+/** This is the pre-PPL TBB concurrent queue which support boundedness and blocking semantics.
+    Note that method names agree with the PPL-style concurrent queue.
+    Multiple threads may each push and pop concurrently.
+    Assignment construction is not allowed.
+    @ingroup containers */
+template<typename T, class A = cache_aligned_allocator<T> > 
+class concurrent_queue: public concurrent_bounded_queue<T,A> {
+#if !__TBB_TEMPLATE_FRIENDS_BROKEN
+    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
+#endif 
+
+public:
+    //! Construct empty queue
+    explicit concurrent_queue(const A& a = A()) : 
+        concurrent_bounded_queue<T,A>( a )
+    {
+    }
+
+    //! Copy constructor
+    concurrent_queue( const concurrent_queue& src, const A& a = A()) : 
+        concurrent_bounded_queue<T,A>( src, a )
+    {
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_queue( InputIterator b /*begin*/, InputIterator e /*end*/, const A& a = A()) :
+        concurrent_bounded_queue<T,A>( b, e, a )
+    {
+    }
+
+    //! Enqueue an item at tail of queue if queue is not already full.
+    /** Does not wait for queue to become not full.
+        Returns true if item is pushed; false if queue was already full. */
+    bool push_if_not_full( const T& source ) {
+        return this->try_push( source );
+    }
+
+    //! Attempt to dequeue an item from head of queue.
+    /** Does not wait for item to become available.
+        Returns true if successful; false otherwise. 
+        @deprecated Use try_pop()
+        */
+    bool pop_if_present( T& destination ) {
+        return this->try_pop( destination );
+    }
+
+    typedef typename concurrent_bounded_queue<T,A>::iterator iterator;
+    typedef typename concurrent_bounded_queue<T,A>::const_iterator const_iterator;
+    //
+    //------------------------------------------------------------------------
+    // The iterators are intended only for debugging.  They are slow and not thread safe.
+    //------------------------------------------------------------------------
+    iterator begin() {return this->unsafe_begin();}
+    iterator end() {return this->unsafe_end();}
+    const_iterator begin() const {return this->unsafe_begin();}
+    const_iterator end() const {return this->unsafe_end();}
+}; 
+
+}
+    
+
+#if TBB_DEPRECATED
+using deprecated::concurrent_queue;
+#else
+using strict_ppl::concurrent_queue;    
+#endif
+
+} // namespace tbb
+
+#endif /* __TBB_concurrent_queue_H */
diff --git a/tbb/include/tbb/concurrent_unordered_map.h b/tbb/include/tbb/concurrent_unordered_map.h
new file mode 100644 (file)
index 0000000..ff13c2a
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+/* Container implementations in this header are based on PPL implementations
+   provided by Microsoft. */
+
+#ifndef __TBB_concurrent_unordered_map_H
+#define __TBB_concurrent_unordered_map_H
+
+#include "_concurrent_unordered_internal.h"
+
+namespace tbb
+{
+
+// Template class for hash compare
+template<typename Key>
+class tbb_hash
+{
+public:
+    tbb_hash() {}
+
+    size_t operator()(const Key& key) const
+    {
+        return tbb_hasher(key);
+    }
+};
+
+namespace interface5 {
+
+// Template class for hash map traits
+template<typename Key, typename T, typename Hash_compare, typename Allocator, bool Allow_multimapping>
+class concurrent_unordered_map_traits
+{
+protected:
+    typedef std::pair<const Key, T> value_type;
+    typedef Key key_type;
+    typedef Hash_compare hash_compare;
+    typedef typename Allocator::template rebind<value_type>::other allocator_type;
+    enum { allow_multimapping = Allow_multimapping };
+
+    concurrent_unordered_map_traits() : my_hash_compare() {}
+    concurrent_unordered_map_traits(const hash_compare& hc) : my_hash_compare(hc) {}
+
+    class value_compare : public std::binary_function<value_type, value_type, bool>
+    {
+        friend class concurrent_unordered_map_traits<Key, T, Hash_compare, Allocator, Allow_multimapping>;
+
+    public:
+        bool operator()(const value_type& left, const value_type& right) const
+        {
+            return (my_hash_compare(left.first, right.first));
+        }
+
+        value_compare(const hash_compare& comparator) : my_hash_compare(comparator) {}
+
+    protected:
+        hash_compare my_hash_compare;    // the comparator predicate for keys
+    };
+
+    template<class Type1, class Type2>
+    static const Key& get_key(const std::pair<Type1, Type2>& value) {
+        return (value.first);
+    }
+
+    hash_compare my_hash_compare; // the comparator predicate for keys
+};
+
+template <typename Key, typename T, typename Hasher = tbb_hash<Key>, typename Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
+class concurrent_unordered_map : public internal::concurrent_unordered_base< concurrent_unordered_map_traits<Key, T, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
+{
+    // Base type definitions
+    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
+    typedef internal::concurrent_unordered_base< concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, false> > base_type;
+    typedef concurrent_unordered_map_traits<Key, T, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> traits_type;
+    using traits_type::my_hash_compare;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using traits_type::allow_multimapping;
+public:
+    using base_type::end;
+    using base_type::find;
+    using base_type::insert;
+
+    // Type definitions
+    typedef Key key_type;
+    typedef typename base_type::value_type value_type;
+    typedef T mapped_type;
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+    typedef hash_compare key_compare;
+
+    typedef typename base_type::allocator_type allocator_type;
+    typedef typename base_type::pointer pointer;
+    typedef typename base_type::const_pointer const_pointer;
+    typedef typename base_type::reference reference;
+    typedef typename base_type::const_reference const_reference;
+
+    typedef typename base_type::size_type size_type;
+    typedef typename base_type::difference_type difference_type;
+
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+    typedef typename base_type::iterator local_iterator;
+    typedef typename base_type::const_iterator const_local_iterator;
+
+    // Construction/destruction/copying
+    explicit concurrent_unordered_map(size_type n_of_buckets = 8, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+    }
+
+    concurrent_unordered_map(const Allocator& a) : base_type(8, key_compare(), a)
+    {
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets = 8, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        for (; first != last; ++first)
+            base_type::insert(*first);
+    }
+
+    concurrent_unordered_map(const concurrent_unordered_map& table) : base_type(table)
+    {
+    }
+
+    concurrent_unordered_map(const concurrent_unordered_map& table, const Allocator& a)
+        : base_type(table, a)
+    {
+    }
+
+    concurrent_unordered_map& operator=(const concurrent_unordered_map& table)
+    {
+        base_type::operator=(table);
+        return (*this);
+    }
+
+    iterator unsafe_erase(const_iterator where)
+    {
+        return base_type::unsafe_erase(where);
+    }
+
+    size_type unsafe_erase(const key_type& key)
+    {
+        return base_type::unsafe_erase(key);
+    }
+
+    iterator unsafe_erase(const_iterator first, const_iterator last)
+    {
+        return base_type::unsafe_erase(first, last);
+    }
+
+    void swap(concurrent_unordered_map& table)
+    {
+        base_type::swap(table);
+    }
+
+    // Observers
+    hasher hash_function() const
+    {
+        return my_hash_compare.my_hash_object;
+    }
+
+    key_equal key_eq() const
+    {
+        return my_hash_compare.my_key_compare_object;
+    }
+
+    mapped_type& operator[](const key_type& key)
+    {
+        iterator where = find(key);
+
+        if (where == end())
+        {
+            where = insert(std::pair<key_type, mapped_type>(key, mapped_type())).first;
+        }
+
+        return ((*where).second);
+    }
+
+    mapped_type& at(const key_type& key)
+    {
+        iterator where = find(key);
+
+        if (where == end())
+        {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return ((*where).second);
+    }
+
+    const mapped_type& at(const key_type& key) const
+    {
+        const_iterator where = find(key);
+
+        if (where == end())
+        {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return ((*where).second);
+    }
+};
+
+} // namespace interface5
+
+using interface5::concurrent_unordered_map;
+
+} // namespace tbb
+
+#endif// __TBB_concurrent_unordered_map_H
diff --git a/tbb/include/tbb/concurrent_vector.h b/tbb/include/tbb/concurrent_vector.h
new file mode 100644 (file)
index 0000000..abcc645
--- /dev/null
@@ -0,0 +1,1061 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_concurrent_vector_H
+#define __TBB_concurrent_vector_H
+
+#include "tbb_stddef.h"
+#include "tbb_exception.h"
+#include "atomic.h"
+#include "cache_aligned_allocator.h"
+#include "blocked_range.h"
+#include "tbb_machine.h"
+#include "tbb_profiling.h"
+#include <new>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <algorithm>
+#include <iterator>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+#if _MSC_VER==1500 && !__INTEL_COMPILER
+    // VS2008/VC9 seems to have an issue; limits pull in math.h
+    #pragma warning( push )
+    #pragma warning( disable: 4985 )
+#endif
+#include <limits> /* std::numeric_limits */
+#if _MSC_VER==1500 && !__INTEL_COMPILER
+    #pragma warning( pop )
+#endif
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_Wp64)
+    // Workaround for overzealous compiler warnings in /Wp64 mode
+    #pragma warning (push)
+    #pragma warning (disable: 4267)
+#endif
+
+namespace tbb {
+
+template<typename T, class A = cache_aligned_allocator<T> >
+class concurrent_vector;
+
+//! @cond INTERNAL
+namespace internal {
+
+    //! Bad allocation marker
+    static void *const vector_allocation_error_flag = reinterpret_cast<void*>(size_t(63));
+
+    //! Base class of concurrent vector implementation.
+    /** @ingroup containers */
+    class concurrent_vector_base_v3 {
+    protected:
+
+        // Basic types declarations
+        typedef size_t segment_index_t;
+        typedef size_t size_type;
+
+        // Using enumerations due to Mac linking problems of static const variables
+        enum {
+            // Size constants
+            default_initial_segments = 1, // 2 initial items
+            //! Number of slots for segment's pointers inside the class
+            pointers_per_short_table = 3, // to fit into 8 words of entire structure
+            pointers_per_long_table = sizeof(segment_index_t) * 8 // one segment per bit
+        };
+
+        // Segment pointer. Can be zero-initialized
+        struct segment_t {
+            void* array;
+#if TBB_USE_ASSERT
+            ~segment_t() {
+                __TBB_ASSERT( array <= internal::vector_allocation_error_flag, "should have been freed by clear" );
+            }
+#endif /* TBB_USE_ASSERT */
+        };
+        // Data fields
+
+        //! allocator function pointer
+        void* (*vector_allocator_ptr)(concurrent_vector_base_v3 &, size_t);
+
+        //! count of segments in the first block
+        atomic<size_type> my_first_block;
+
+        //! Requested size of vector
+        atomic<size_type> my_early_size;
+
+        //! Pointer to the segments table
+        atomic<segment_t*> my_segment;
+
+        //! embedded storage of segment pointers
+        segment_t my_storage[pointers_per_short_table];
+
+        // Methods
+
+        concurrent_vector_base_v3() {
+            my_early_size = 0;
+            my_first_block = 0; // here is not default_initial_segments
+            for( segment_index_t i = 0; i < pointers_per_short_table; i++)
+                my_storage[i].array = NULL;
+            my_segment = my_storage;
+        }
+        __TBB_EXPORTED_METHOD ~concurrent_vector_base_v3();
+
+        static segment_index_t segment_index_of( size_type index ) {
+            return segment_index_t( __TBB_Log2( index|1 ) );
+        }
+
+        static segment_index_t segment_base( segment_index_t k ) {
+            return (segment_index_t(1)<<k & ~segment_index_t(1));
+        }
+
+        static inline segment_index_t segment_base_index_of( segment_index_t &index ) {
+            segment_index_t k = segment_index_of( index );
+            index -= segment_base(k);
+            return k;
+        }
+
+        static size_type segment_size( segment_index_t k ) {
+            return segment_index_t(1)<<k; // fake value for k==0
+        }
+
+        //! An operation on an n-element array starting at begin.
+        typedef void (__TBB_EXPORTED_FUNC *internal_array_op1)(void* begin, size_type n );
+
+        //! An operation on n-element destination array and n-element source array.
+        typedef void (__TBB_EXPORTED_FUNC *internal_array_op2)(void* dst, const void* src, size_type n );
+
+        //! Internal structure for compact()
+        struct internal_segments_table {
+            segment_index_t first_block;
+            void* table[pointers_per_long_table];
+        };
+
+        void __TBB_EXPORTED_METHOD internal_reserve( size_type n, size_type element_size, size_type max_size );
+        size_type __TBB_EXPORTED_METHOD internal_capacity() const;
+        void internal_grow( size_type start, size_type finish, size_type element_size, internal_array_op2 init, const void *src );
+        size_type __TBB_EXPORTED_METHOD internal_grow_by( size_type delta, size_type element_size, internal_array_op2 init, const void *src );
+        void* __TBB_EXPORTED_METHOD internal_push_back( size_type element_size, size_type& index );
+        segment_index_t __TBB_EXPORTED_METHOD internal_clear( internal_array_op1 destroy );
+        void* __TBB_EXPORTED_METHOD internal_compact( size_type element_size, void *table, internal_array_op1 destroy, internal_array_op2 copy );
+        void __TBB_EXPORTED_METHOD internal_copy( const concurrent_vector_base_v3& src, size_type element_size, internal_array_op2 copy );
+        void __TBB_EXPORTED_METHOD internal_assign( const concurrent_vector_base_v3& src, size_type element_size,
+                              internal_array_op1 destroy, internal_array_op2 assign, internal_array_op2 copy );
+        //! Obsolete
+        void __TBB_EXPORTED_METHOD internal_throw_exception(size_type) const;
+        void __TBB_EXPORTED_METHOD internal_swap(concurrent_vector_base_v3& v);
+
+        void __TBB_EXPORTED_METHOD internal_resize( size_type n, size_type element_size, size_type max_size, const void *src,
+                                                    internal_array_op1 destroy, internal_array_op2 init );
+        size_type __TBB_EXPORTED_METHOD internal_grow_to_at_least_with_result( size_type new_size, size_type element_size, internal_array_op2 init, const void *src );
+
+        //! Deprecated entry point for backwards compatibility to TBB 2.1.
+        void __TBB_EXPORTED_METHOD internal_grow_to_at_least( size_type new_size, size_type element_size, internal_array_op2 init, const void *src );
+private:
+        //! Private functionality
+        class helper;
+        friend class helper;
+    };
+    
+    typedef concurrent_vector_base_v3 concurrent_vector_base;
+
+    //! Meets requirements of a forward iterator for STL and a Value for a blocked_range.*/
+    /** Value is either the T or const T type of the container.
+        @ingroup containers */
+    template<typename Container, typename Value>
+    class vector_iterator 
+    {
+        //! concurrent_vector over which we are iterating.
+        Container* my_vector;
+
+        //! Index into the vector 
+        size_t my_index;
+
+        //! Caches my_vector-&gt;internal_subscript(my_index)
+        /** NULL if cached value is not available */
+        mutable Value* my_item;
+
+        template<typename C, typename T>
+        friend vector_iterator<C,T> operator+( ptrdiff_t offset, const vector_iterator<C,T>& v );
+
+        template<typename C, typename T, typename U>
+        friend bool operator==( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
+
+        template<typename C, typename T, typename U>
+        friend bool operator<( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
+
+        template<typename C, typename T, typename U>
+        friend ptrdiff_t operator-( const vector_iterator<C,T>& i, const vector_iterator<C,U>& j );
+    
+        template<typename C, typename U>
+        friend class internal::vector_iterator;
+
+#if !defined(_MSC_VER) || defined(__INTEL_COMPILER)
+        template<typename T, class A>
+        friend class tbb::concurrent_vector;
+#else
+public: // workaround for MSVC
+#endif 
+
+        vector_iterator( const Container& vector, size_t index, void *ptr = 0 ) : 
+            my_vector(const_cast<Container*>(&vector)), 
+            my_index(index), 
+            my_item(static_cast<Value*>(ptr))
+        {}
+
+    public:
+        //! Default constructor
+        vector_iterator() : my_vector(NULL), my_index(~size_t(0)), my_item(NULL) {}
+
+        vector_iterator( const vector_iterator<Container,typename Container::value_type>& other ) :
+            my_vector(other.my_vector),
+            my_index(other.my_index),
+            my_item(other.my_item)
+        {}
+
+        vector_iterator operator+( ptrdiff_t offset ) const {
+            return vector_iterator( *my_vector, my_index+offset );
+        }
+        vector_iterator &operator+=( ptrdiff_t offset ) {
+            my_index+=offset;
+            my_item = NULL;
+            return *this;
+        }
+        vector_iterator operator-( ptrdiff_t offset ) const {
+            return vector_iterator( *my_vector, my_index-offset );
+        }
+        vector_iterator &operator-=( ptrdiff_t offset ) {
+            my_index-=offset;
+            my_item = NULL;
+            return *this;
+        }
+        Value& operator*() const {
+            Value* item = my_item;
+            if( !item ) {
+                item = my_item = &my_vector->internal_subscript(my_index);
+            }
+            __TBB_ASSERT( item==&my_vector->internal_subscript(my_index), "corrupt cache" );
+            return *item;
+        }
+        Value& operator[]( ptrdiff_t k ) const {
+            return my_vector->internal_subscript(my_index+k);
+        }
+        Value* operator->() const {return &operator*();}
+
+        //! Pre increment
+        vector_iterator& operator++() {
+            size_t k = ++my_index;
+            if( my_item ) {
+                // Following test uses 2's-complement wizardry
+                if( (k& (k-2))==0 ) {
+                    // k is a power of two that is at least k-2
+                    my_item= NULL;
+                } else {
+                    ++my_item;
+                }
+            }
+            return *this;
+        }
+
+        //! Pre decrement
+        vector_iterator& operator--() {
+            __TBB_ASSERT( my_index>0, "operator--() applied to iterator already at beginning of concurrent_vector" ); 
+            size_t k = my_index--;
+            if( my_item ) {
+                // Following test uses 2's-complement wizardry
+                if( (k& (k-2))==0 ) {
+                    // k is a power of two that is at least k-2  
+                    my_item= NULL;
+                } else {
+                    --my_item;
+                }
+            }
+            return *this;
+        }
+
+        //! Post increment
+        vector_iterator operator++(int) {
+            vector_iterator result = *this;
+            operator++();
+            return result;
+        }
+
+        //! Post decrement
+        vector_iterator operator--(int) {
+            vector_iterator result = *this;
+            operator--();
+            return result;
+        }
+
+        // STL support
+
+        typedef ptrdiff_t difference_type;
+        typedef Value value_type;
+        typedef Value* pointer;
+        typedef Value& reference;
+        typedef std::random_access_iterator_tag iterator_category;
+    };
+
+    template<typename Container, typename T>
+    vector_iterator<Container,T> operator+( ptrdiff_t offset, const vector_iterator<Container,T>& v ) {
+        return vector_iterator<Container,T>( *v.my_vector, v.my_index+offset );
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator==( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return i.my_index==j.my_index && i.my_vector == j.my_vector;
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator!=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return !(i==j);
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator<( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return i.my_index<j.my_index;
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator>( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return j<i;
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator>=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return !(i<j);
+    }
+
+    template<typename Container, typename T, typename U>
+    bool operator<=( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return !(j<i);
+    }
+
+    template<typename Container, typename T, typename U>
+    ptrdiff_t operator-( const vector_iterator<Container,T>& i, const vector_iterator<Container,U>& j ) {
+        return ptrdiff_t(i.my_index)-ptrdiff_t(j.my_index);
+    }
+
+    template<typename T, class A>
+    class allocator_base {
+    public:
+        typedef typename A::template
+            rebind<T>::other allocator_type;
+        allocator_type my_allocator;
+
+        allocator_base(const allocator_type &a = allocator_type() ) : my_allocator(a) {}
+    };
+
+} // namespace internal
+//! @endcond
+
+//! Concurrent vector container
+/** concurrent_vector is a container having the following main properties:
+    - It provides random indexed access to its elements. The index of the first element is 0.
+    - It ensures safe concurrent growing its size (different threads can safely append new elements).
+    - Adding new elements does not invalidate existing iterators and does not change indices of existing items.
+
+@par Compatibility
+    The class meets all Container Requirements and Reversible Container Requirements from
+    C++ Standard (See ISO/IEC 14882:2003(E), clause 23.1). But it doesn't meet
+    Sequence Requirements due to absence of insert() and erase() methods.
+
+@par Exception Safety
+    Methods working with memory allocation and/or new elements construction can throw an
+    exception if allocator fails to allocate memory or element's default constructor throws one.
+    Concurrent vector's element of type T must conform to the following requirements:
+    - Throwing an exception is forbidden for destructor of T.
+    - Default constructor of T must not throw an exception OR its non-virtual destructor must safely work when its object memory is zero-initialized.
+    .
+    Otherwise, the program's behavior is undefined.
+@par
+    If an exception happens inside growth or assignment operation, an instance of the vector becomes invalid unless it is stated otherwise in the method documentation.
+    Invalid state means:
+    - There are no guaranties that all items were initialized by a constructor. The rest of items is zero-filled, including item where exception happens.
+    - An invalid vector instance cannot be repaired; it is unable to grow anymore.
+    - Size and capacity reported by the vector are incorrect, and calculated as if the failed operation were successful.
+    - Attempt to access not allocated elements using operator[] or iterators results in access violation or segmentation fault exception, and in case of using at() method a C++ exception is thrown.
+    .
+    If a concurrent grow operation successfully completes, all the elements it has added to the vector will remain valid and accessible even if one of subsequent grow operations fails.
+
+@par Fragmentation
+    Unlike an STL vector, a concurrent_vector does not move existing elements if it needs
+    to allocate more memory. The container is divided into a series of contiguous arrays of
+    elements. The first reservation, growth, or assignment operation determines the size of
+    the first array. Using small number of elements as initial size incurs fragmentation that
+    may increase element access time. Internal layout can be optimized by method compact() that
+    merges several smaller arrays into one solid.
+
+@par Changes since TBB 2.1
+    - Fixed guarantees of concurrent_vector::size() and grow_to_at_least() methods to assure elements are allocated.
+    - Methods end()/rbegin()/back() are partly thread-safe since they use size() to get the end of vector
+    - Added resize() methods (not thread-safe)
+    - Added cbegin/cend/crbegin/crend methods
+    - Changed return type of methods grow* and push_back to iterator
+
+@par Changes since TBB 2.0
+    - Implemented exception-safety guaranties
+    - Added template argument for allocator
+    - Added allocator argument in constructors
+    - Faster index calculation
+    - First growth call specifies a number of segments to be merged in the first allocation.
+    - Fixed memory blow up for swarm of vector's instances of small size
+    - Added grow_by(size_type n, const_reference t) growth using copying constructor to init new items. 
+    - Added STL-like constructors.
+    - Added operators ==, < and derivatives
+    - Added at() method, approved for using after an exception was thrown inside the vector
+    - Added get_allocator() method.
+    - Added assign() methods
+    - Added compact() method to defragment first segments
+    - Added swap() method
+    - range() defaults on grainsize = 1 supporting auto grainsize algorithms. 
+
+    @ingroup containers */
+template<typename T, class A>
+class concurrent_vector: protected internal::allocator_base<T, A>,
+                         private internal::concurrent_vector_base {
+private:
+    template<typename I>
+    class generic_range_type: public blocked_range<I> {
+    public:
+        typedef T value_type;
+        typedef T& reference;
+        typedef const T& const_reference;
+        typedef I iterator;
+        typedef ptrdiff_t difference_type;
+        generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} 
+        template<typename U>
+        generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} 
+        generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
+    };
+
+    template<typename C, typename U>
+    friend class internal::vector_iterator;
+public:
+    //------------------------------------------------------------------------
+    // STL compatible types
+    //------------------------------------------------------------------------
+    typedef internal::concurrent_vector_base_v3::size_type size_type;
+    typedef typename internal::allocator_base<T, A>::allocator_type allocator_type;
+
+    typedef T value_type;
+    typedef ptrdiff_t difference_type;
+    typedef T& reference;
+    typedef const T& const_reference;
+    typedef T *pointer;
+    typedef const T *const_pointer;
+
+    typedef internal::vector_iterator<concurrent_vector,T> iterator;
+    typedef internal::vector_iterator<concurrent_vector,const T> const_iterator;
+
+#if !defined(_MSC_VER) || _CPPLIB_VER>=300 
+    // Assume ISO standard definition of std::reverse_iterator
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+#else
+    // Use non-standard std::reverse_iterator
+    typedef std::reverse_iterator<iterator,T,T&,T*> reverse_iterator;
+    typedef std::reverse_iterator<const_iterator,T,const T&,const T*> const_reverse_iterator;
+#endif /* defined(_MSC_VER) && (_MSC_VER<1300) */
+
+    //------------------------------------------------------------------------
+    // Parallel algorithm support
+    //------------------------------------------------------------------------
+    typedef generic_range_type<iterator> range_type;
+    typedef generic_range_type<const_iterator> const_range_type;
+
+    //------------------------------------------------------------------------
+    // STL compatible constructors & destructors
+    //------------------------------------------------------------------------
+
+    //! Construct empty vector.
+    explicit concurrent_vector(const allocator_type &a = allocator_type())
+        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
+    {
+        vector_allocator_ptr = &internal_allocator;
+    }
+
+    //! Copying constructor
+    concurrent_vector( const concurrent_vector& vector, const allocator_type& a = allocator_type() )
+        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
+    {
+        vector_allocator_ptr = &internal_allocator;
+        __TBB_TRY {
+            internal_copy(vector, sizeof(T), &copy_array);
+        } __TBB_CATCH(...) {
+            segment_t *table = my_segment;
+            internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+            __TBB_RETHROW();
+        }
+    }
+
+    //! Copying constructor for vector with different allocator type
+    template<class M>
+    concurrent_vector( const concurrent_vector<T, M>& vector, const allocator_type& a = allocator_type() )
+        : internal::allocator_base<T, A>(a), internal::concurrent_vector_base()
+    {
+        vector_allocator_ptr = &internal_allocator;
+        __TBB_TRY {
+            internal_copy(vector.internal_vector_base(), sizeof(T), &copy_array);
+        } __TBB_CATCH(...) {
+            segment_t *table = my_segment;
+            internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+            __TBB_RETHROW();
+        }
+    }
+
+    //! Construction with initial size specified by argument n
+    explicit concurrent_vector(size_type n)
+    {
+        vector_allocator_ptr = &internal_allocator;
+        __TBB_TRY {
+            internal_resize( n, sizeof(T), max_size(), NULL, &destroy_array, &initialize_array );
+        } __TBB_CATCH(...) {
+            segment_t *table = my_segment;
+            internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+            __TBB_RETHROW();
+        }
+    }
+
+    //! Construction with initial size specified by argument n, initialization by copying of t, and given allocator instance
+    concurrent_vector(size_type n, const_reference t, const allocator_type& a = allocator_type())
+        : internal::allocator_base<T, A>(a)
+    {
+        vector_allocator_ptr = &internal_allocator;
+        __TBB_TRY {
+            internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
+        } __TBB_CATCH(...) {
+            segment_t *table = my_segment;
+            internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+            __TBB_RETHROW();
+        }
+    }
+
+    //! Construction with copying iteration range and given allocator instance
+    template<class I>
+    concurrent_vector(I first, I last, const allocator_type &a = allocator_type())
+        : internal::allocator_base<T, A>(a)
+    {
+        vector_allocator_ptr = &internal_allocator;
+        __TBB_TRY {
+            internal_assign_range(first, last, static_cast<is_integer_tag<std::numeric_limits<I>::is_integer> *>(0) );
+        } __TBB_CATCH(...) {
+            segment_t *table = my_segment;
+            internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+            __TBB_RETHROW();
+        }
+    }
+
+    //! Assignment
+    concurrent_vector& operator=( const concurrent_vector& vector ) {
+        if( this != &vector )
+            internal_assign(vector, sizeof(T), &destroy_array, &assign_array, &copy_array);
+        return *this;
+    }
+
+    //! Assignment for vector with different allocator type
+    template<class M>
+    concurrent_vector& operator=( const concurrent_vector<T, M>& vector ) {
+        if( static_cast<void*>( this ) != static_cast<const void*>( &vector ) )
+            internal_assign(vector.internal_vector_base(),
+                sizeof(T), &destroy_array, &assign_array, &copy_array);
+        return *this;
+    }
+
+    //------------------------------------------------------------------------
+    // Concurrent operations
+    //------------------------------------------------------------------------
+    //! Grow by "delta" elements.
+#if TBB_DEPRECATED
+    /** Returns old size. */
+    size_type grow_by( size_type delta ) {
+        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size;
+    }
+#else
+    /** Returns iterator pointing to the first new element. */
+    iterator grow_by( size_type delta ) {
+        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size);
+    }
+#endif
+
+    //! Grow by "delta" elements using copying constuctor.
+#if TBB_DEPRECATED
+    /** Returns old size. */
+    size_type grow_by( size_type delta, const_reference t ) {
+        return delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size;
+    }
+#else
+    /** Returns iterator pointing to the first new element. */
+    iterator grow_by( size_type delta, const_reference t ) {
+        return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size);
+    }
+#endif
+
+    //! Append minimal sequence of elements such that size()>=n.  
+#if TBB_DEPRECATED
+    /** The new elements are default constructed.  Blocks until all elements in range [0..n) are allocated.
+        May return while other elements are being constructed by other threads. */
+    void grow_to_at_least( size_type n ) {
+        if( n ) internal_grow_to_at_least_with_result( n, sizeof(T), &initialize_array, NULL );
+    };
+#else
+    /** The new elements are default constructed.  Blocks until all elements in range [0..n) are allocated.
+        May return while other elements are being constructed by other threads.
+        Returns iterator that points to beginning of appended sequence.
+        If no elements were appended, returns iterator pointing to nth element. */
+    iterator grow_to_at_least( size_type n ) {
+        size_type m=0;
+        if( n ) {
+            m = internal_grow_to_at_least_with_result( n, sizeof(T), &initialize_array, NULL );
+            if( m>n ) m=n;
+        }
+        return iterator(*this, m);
+    };
+#endif
+
+    //! Push item 
+#if TBB_DEPRECATED
+    size_type push_back( const_reference item )
+#else
+    /** Returns iterator pointing to the new element. */
+    iterator push_back( const_reference item )
+#endif
+    {
+        size_type k;
+        void *ptr = internal_push_back(sizeof(T),k);
+        internal_loop_guide loop(1, ptr);
+        loop.init(&item);
+#if TBB_DEPRECATED
+        return k;
+#else
+        return iterator(*this, k, ptr);
+#endif
+    }
+
+    //! Get reference to element at given index.
+    /** This method is thread-safe for concurrent reads, and also while growing the vector,
+        as long as the calling thread has checked that index&lt;size(). */
+    reference operator[]( size_type index ) {
+        return internal_subscript(index);
+    }
+
+    //! Get const reference to element at given index.
+    const_reference operator[]( size_type index ) const {
+        return internal_subscript(index);
+    }
+
+    //! Get reference to element at given index. Throws exceptions on errors.
+    reference at( size_type index ) {
+        return internal_subscript_with_exceptions(index);
+    }
+
+    //! Get const reference to element at given index. Throws exceptions on errors.
+    const_reference at( size_type index ) const {
+        return internal_subscript_with_exceptions(index);
+    }
+
+    //! Get range for iterating with parallel algorithms
+    range_type range( size_t grainsize = 1) {
+        return range_type( begin(), end(), grainsize );
+    }
+
+    //! Get const range for iterating with parallel algorithms
+    const_range_type range( size_t grainsize = 1 ) const {
+        return const_range_type( begin(), end(), grainsize );
+    }
+    //------------------------------------------------------------------------
+    // Capacity
+    //------------------------------------------------------------------------
+    //! Return size of vector. It may include elements under construction
+    size_type size() const {
+        size_type sz = my_early_size, cp = internal_capacity();
+        return cp < sz ? cp : sz;
+    }
+
+    //! Return true if vector is not empty or has elements under construction at least.
+    bool empty() const {return !my_early_size;}
+
+    //! Maximum size to which array can grow without allocating more memory. Concurrent allocations are not included in the value.
+    size_type capacity() const {return internal_capacity();}
+
+    //! Allocate enough space to grow to size n without having to allocate more memory later.
+    /** Like most of the methods provided for STL compatibility, this method is *not* thread safe. 
+        The capacity afterwards may be bigger than the requested reservation. */
+    void reserve( size_type n ) {
+        if( n )
+            internal_reserve(n, sizeof(T), max_size());
+    }
+
+    //! Resize the vector. Not thread-safe.
+    void resize( size_type n ) {
+        internal_resize( n, sizeof(T), max_size(), NULL, &destroy_array, &initialize_array );
+    }
+    
+    //! Resize the vector, copy t for new elements. Not thread-safe.
+    void resize( size_type n, const_reference t ) {
+        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
+    }
+   
+#if TBB_DEPRECATED 
+    //! An alias for shrink_to_fit()
+    void compact() {shrink_to_fit();}
+#endif /* TBB_DEPRECATED */
+
+    //! Optimize memory usage and fragmentation.
+    void shrink_to_fit();
+
+    //! Upper bound on argument to reserve.
+    size_type max_size() const {return (~size_type(0))/sizeof(T);}
+
+    //------------------------------------------------------------------------
+    // STL support
+    //------------------------------------------------------------------------
+
+    //! start iterator
+    iterator begin() {return iterator(*this,0);}
+    //! end iterator
+    iterator end() {return iterator(*this,size());}
+    //! start const iterator
+    const_iterator begin() const {return const_iterator(*this,0);}
+    //! end const iterator
+    const_iterator end() const {return const_iterator(*this,size());}
+    //! start const iterator
+    const_iterator cbegin() const {return const_iterator(*this,0);}
+    //! end const iterator
+    const_iterator cend() const {return const_iterator(*this,size());}
+    //! reverse start iterator
+    reverse_iterator rbegin() {return reverse_iterator(end());}
+    //! reverse end iterator
+    reverse_iterator rend() {return reverse_iterator(begin());}
+    //! reverse start const iterator
+    const_reverse_iterator rbegin() const {return const_reverse_iterator(end());}
+    //! reverse end const iterator
+    const_reverse_iterator rend() const {return const_reverse_iterator(begin());}
+    //! reverse start const iterator
+    const_reverse_iterator crbegin() const {return const_reverse_iterator(end());}
+    //! reverse end const iterator
+    const_reverse_iterator crend() const {return const_reverse_iterator(begin());}
+    //! the first item
+    reference front() {
+        __TBB_ASSERT( size()>0, NULL);
+        return static_cast<T*>(my_segment[0].array)[0];
+    }
+    //! the first item const
+    const_reference front() const {
+        __TBB_ASSERT( size()>0, NULL);
+        return static_cast<const T*>(my_segment[0].array)[0];
+    }
+    //! the last item
+    reference back() {
+        __TBB_ASSERT( size()>0, NULL);
+        return internal_subscript( size()-1 );
+    }
+    //! the last item const
+    const_reference back() const {
+        __TBB_ASSERT( size()>0, NULL);
+        return internal_subscript( size()-1 );
+    }
+    //! return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    //! assign n items by copying t item
+    void assign(size_type n, const_reference t) {
+        clear();
+        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(&t), &destroy_array, &initialize_array_by );
+    }
+
+    //! assign range [first, last)
+    template<class I>
+    void assign(I first, I last) {
+        clear(); internal_assign_range( first, last, static_cast<is_integer_tag<std::numeric_limits<I>::is_integer> *>(0) );
+    }
+
+    //! swap two instances
+    void swap(concurrent_vector &vector) {
+        if( this != &vector ) {
+            concurrent_vector_base_v3::internal_swap(static_cast<concurrent_vector_base_v3&>(vector));
+            std::swap(this->my_allocator, vector.my_allocator);
+        }
+    }
+
+    //! Clear container while keeping memory allocated.
+    /** To free up the memory, use in conjunction with method compact(). Not thread safe **/
+    void clear() {
+        internal_clear(&destroy_array);
+    }
+
+    //! Clear and destroy vector.
+    ~concurrent_vector() {
+        segment_t *table = my_segment;
+        internal_free_segments( reinterpret_cast<void**>(table), internal_clear(&destroy_array), my_first_block );
+        // base class destructor call should be then
+    }
+
+    const internal::concurrent_vector_base_v3 &internal_vector_base() const { return *this; }
+private:
+    //! Allocate k items
+    static void *internal_allocator(internal::concurrent_vector_base_v3 &vb, size_t k) {
+        return static_cast<concurrent_vector<T, A>&>(vb).my_allocator.allocate(k);
+    }
+    //! Free k segments from table
+    void internal_free_segments(void *table[], segment_index_t k, segment_index_t first_block);
+
+    //! Get reference to element at given index.
+    T& internal_subscript( size_type index ) const;
+
+    //! Get reference to element at given index with errors checks
+    T& internal_subscript_with_exceptions( size_type index ) const;
+
+    //! assign n items by copying t
+    void internal_assign_n(size_type n, const_pointer p) {
+        internal_resize( n, sizeof(T), max_size(), static_cast<const void*>(p), &destroy_array, p? &initialize_array_by : &initialize_array );
+    }
+
+    //! helper class
+    template<bool B> class is_integer_tag;
+
+    //! assign integer items by copying when arguments are treated as iterators. See C++ Standard 2003 23.1.1p9
+    template<class I>
+    void internal_assign_range(I first, I last, is_integer_tag<true> *) {
+        internal_assign_n(static_cast<size_type>(first), &static_cast<T&>(last));
+    }
+    //! inline proxy assign by iterators
+    template<class I>
+    void internal_assign_range(I first, I last, is_integer_tag<false> *) {
+        internal_assign_iterators(first, last);
+    }
+    //! assign by iterators
+    template<class I>
+    void internal_assign_iterators(I first, I last);
+
+    //! Construct n instances of T, starting at "begin".
+    static void __TBB_EXPORTED_FUNC initialize_array( void* begin, const void*, size_type n );
+
+    //! Construct n instances of T, starting at "begin".
+    static void __TBB_EXPORTED_FUNC initialize_array_by( void* begin, const void* src, size_type n );
+
+    //! Construct n instances of T, starting at "begin".
+    static void __TBB_EXPORTED_FUNC copy_array( void* dst, const void* src, size_type n );
+
+    //! Assign n instances of T, starting at "begin".
+    static void __TBB_EXPORTED_FUNC assign_array( void* dst, const void* src, size_type n );
+
+    //! Destroy n instances of T, starting at "begin".
+    static void __TBB_EXPORTED_FUNC destroy_array( void* begin, size_type n );
+
+    //! Exception-aware helper class for filling a segment by exception-danger operators of user class
+    class internal_loop_guide : internal::no_copy {
+    public:
+        const pointer array;
+        const size_type n;
+        size_type i;
+        internal_loop_guide(size_type ntrials, void *ptr)
+            : array(static_cast<pointer>(ptr)), n(ntrials), i(0) {}
+        void init() {   for(; i < n; ++i) new( &array[i] ) T(); }
+        void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(*static_cast<const T*>(src)); }
+        void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(static_cast<const T*>(src)[i]); }
+        void assign(const void *src) { for(; i < n; ++i) array[i] = static_cast<const T*>(src)[i]; }
+        template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) new( &array[i] ) T( *src ); }
+        ~internal_loop_guide() {
+            if(i < n) // if exception raised, do zerroing on the rest of items
+                std::memset(array+i, 0, (n-i)*sizeof(value_type));
+        }
+    };
+};
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning (push)
+#pragma warning (disable: 4701) // potentially uninitialized local variable "old"
+#endif
+template<typename T, class A>
+void concurrent_vector<T, A>::shrink_to_fit() {
+    internal_segments_table old;
+    __TBB_TRY {
+        if( internal_compact( sizeof(T), &old, &destroy_array, &copy_array ) )
+            internal_free_segments( old.table, pointers_per_long_table, old.first_block ); // free joined and unnecessary segments
+    } __TBB_CATCH(...) {
+        if( old.first_block ) // free segment allocated for compacting. Only for support of exceptions in ctor of user T[ype]
+            internal_free_segments( old.table, 1, old.first_block );
+        __TBB_RETHROW();
+    }
+}
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning (pop)
+#endif // warning 4701 is back 
+
+template<typename T, class A>
+void concurrent_vector<T, A>::internal_free_segments(void *table[], segment_index_t k, segment_index_t first_block) {
+    // Free the arrays
+    while( k > first_block ) {
+        --k;
+        T* array = static_cast<T*>(table[k]);
+        table[k] = NULL;
+        if( array > internal::vector_allocation_error_flag ) // check for correct segment pointer
+            this->my_allocator.deallocate( array, segment_size(k) );
+    }
+    T* array = static_cast<T*>(table[0]);
+    if( array > internal::vector_allocation_error_flag ) {
+        __TBB_ASSERT( first_block > 0, NULL );
+        while(k > 0) table[--k] = NULL;
+        this->my_allocator.deallocate( array, segment_size(first_block) );
+    }
+}
+
+template<typename T, class A>
+T& concurrent_vector<T, A>::internal_subscript( size_type index ) const {
+    __TBB_ASSERT( index < my_early_size, "index out of bounds" );
+    size_type j = index;
+    segment_index_t k = segment_base_index_of( j );
+    __TBB_ASSERT( (segment_t*)my_segment != my_storage || k < pointers_per_short_table, "index is being allocated" );
+    // no need in __TBB_load_with_acquire since thread works in own space or gets 
+    T* array = static_cast<T*>( tbb::internal::itt_hide_load_word(my_segment[k].array));
+    __TBB_ASSERT( array != internal::vector_allocation_error_flag, "the instance is broken by bad allocation. Use at() instead" );
+    __TBB_ASSERT( array, "index is being allocated" );
+    return array[j];
+}
+
+template<typename T, class A>
+T& concurrent_vector<T, A>::internal_subscript_with_exceptions( size_type index ) const {
+    if( index >= my_early_size )
+        internal::throw_exception(internal::eid_out_of_range); // throw std::out_of_range
+    size_type j = index;
+    segment_index_t k = segment_base_index_of( j );
+    if( (segment_t*)my_segment == my_storage && k >= pointers_per_short_table )
+        internal::throw_exception(internal::eid_segment_range_error); // throw std::range_error
+    void *array = my_segment[k].array; // no need in __TBB_load_with_acquire
+    if( array <= internal::vector_allocation_error_flag ) // check for correct segment pointer
+        internal::throw_exception(internal::eid_index_range_error); // throw std::range_error
+    return static_cast<T*>(array)[j];
+}
+
+template<typename T, class A> template<class I>
+void concurrent_vector<T, A>::internal_assign_iterators(I first, I last) {
+    __TBB_ASSERT(my_early_size == 0, NULL);
+    size_type n = std::distance(first, last);
+    if( !n ) return;
+    internal_reserve(n, sizeof(T), max_size());
+    my_early_size = n;
+    segment_index_t k = 0;
+    size_type sz = segment_size( my_first_block );
+    while( sz < n ) {
+        internal_loop_guide loop(sz, my_segment[k].array);
+        loop.iterate(first);
+        n -= sz;
+        if( !k ) k = my_first_block;
+        else { ++k; sz <<= 1; }
+    }
+    internal_loop_guide loop(n, my_segment[k].array);
+    loop.iterate(first);
+}
+
+template<typename T, class A>
+void concurrent_vector<T, A>::initialize_array( void* begin, const void *, size_type n ) {
+    internal_loop_guide loop(n, begin); loop.init();
+}
+
+template<typename T, class A>
+void concurrent_vector<T, A>::initialize_array_by( void* begin, const void *src, size_type n ) {
+    internal_loop_guide loop(n, begin); loop.init(src);
+}
+
+template<typename T, class A>
+void concurrent_vector<T, A>::copy_array( void* dst, const void* src, size_type n ) {
+    internal_loop_guide loop(n, dst); loop.copy(src);
+}
+
+template<typename T, class A>
+void concurrent_vector<T, A>::assign_array( void* dst, const void* src, size_type n ) {
+    internal_loop_guide loop(n, dst); loop.assign(src);
+}
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+    // Workaround for overzealous compiler warning
+    #pragma warning (push)
+    #pragma warning (disable: 4189)
+#endif
+template<typename T, class A>
+void concurrent_vector<T, A>::destroy_array( void* begin, size_type n ) {
+    T* array = static_cast<T*>(begin);
+    for( size_type j=n; j>0; --j )
+        array[j-1].~T(); // destructors are supposed to not throw any exceptions
+}
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+    #pragma warning (pop)
+#endif // warning 4189 is back 
+
+// concurrent_vector's template functions
+template<typename T, class A1, class A2>
+inline bool operator==(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b) {
+    // Simply:    return a.size() == b.size() && std::equal(a.begin(), a.end(), b.begin());
+    if(a.size() != b.size()) return false;
+    typename concurrent_vector<T, A1>::const_iterator i(a.begin());
+    typename concurrent_vector<T, A2>::const_iterator j(b.begin());
+    for(; i != a.end(); ++i, ++j)
+        if( !(*i == *j) ) return false;
+    return true;
+}
+
+template<typename T, class A1, class A2>
+inline bool operator!=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
+{    return !(a == b); }
+
+template<typename T, class A1, class A2>
+inline bool operator<(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
+{    return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); }
+
+template<typename T, class A1, class A2>
+inline bool operator>(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
+{    return b < a; }
+
+template<typename T, class A1, class A2>
+inline bool operator<=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
+{    return !(b < a); }
+
+template<typename T, class A1, class A2>
+inline bool operator>=(const concurrent_vector<T, A1> &a, const concurrent_vector<T, A2> &b)
+{    return !(a < b); }
+
+template<typename T, class A>
+inline void swap(concurrent_vector<T, A> &a, concurrent_vector<T, A> &b)
+{    a.swap( b ); }
+
+} // namespace tbb
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_Wp64)
+    #pragma warning (pop)
+#endif // warning 4267 is back
+
+#endif /* __TBB_concurrent_vector_H */
diff --git a/tbb/include/tbb/critical_section.h b/tbb/include/tbb/critical_section.h
new file mode 100644 (file)
index 0000000..ea712d1
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef _TBB_CRITICAL_SECTION_H_
+#define _TBB_CRITICAL_SECTION_H_
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#include <errno.h>
+#endif  // _WIN32||WIN64
+
+#include "tbb_stddef.h"
+#include "tbb_thread.h"
+#include "tbb_exception.h"
+
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+    namespace internal {
+class critical_section_v4 : internal::no_copy {
+#if _WIN32||_WIN64
+    CRITICAL_SECTION my_impl;
+#else
+    pthread_mutex_t my_impl;
+#endif
+    tbb_thread::id my_tid;
+public:
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    critical_section_v4() { 
+#if _WIN32||_WIN64
+        InitializeCriticalSection(&my_impl);
+#else
+        pthread_mutex_init(&my_impl, NULL);
+#endif
+        internal_construct();
+    }
+
+    ~critical_section_v4() {
+        __TBB_ASSERT(my_tid == tbb_thread::id(), "Destroying a still-held critical section");
+#if _WIN32||_WIN64
+        DeleteCriticalSection(&my_impl); 
+#else
+        pthread_mutex_destroy(&my_impl);
+#endif
+    }
+
+    class scoped_lock : internal::no_copy {
+    private:
+        critical_section_v4 &my_crit;
+    public:
+        scoped_lock( critical_section_v4& lock_me) :my_crit(lock_me) {
+            my_crit.lock();
+        }
+
+        ~scoped_lock() {
+            my_crit.unlock();
+        }
+    };
+
+    void lock() { 
+        tbb_thread::id local_tid = this_tbb_thread::get_id();
+        if(local_tid == my_tid) throw_exception( eid_improper_lock );
+#if _WIN32||_WIN64
+        EnterCriticalSection( &my_impl );
+#else
+        int rval = pthread_mutex_lock(&my_impl);
+        __TBB_ASSERT_EX(!rval, "critical_section::lock: pthread_mutex_lock failed");
+#endif
+        __TBB_ASSERT(my_tid == tbb_thread::id(), NULL);
+        my_tid = local_tid;
+    }
+
+    bool try_lock() {
+        bool gotlock;
+        tbb_thread::id local_tid = this_tbb_thread::get_id();
+        if(local_tid == my_tid) return false;
+#if _WIN32||_WIN64
+        gotlock = TryEnterCriticalSection( &my_impl ) != 0;
+#else
+        int rval = pthread_mutex_trylock(&my_impl);
+        // valid returns are 0 (locked) and [EBUSY]
+        __TBB_ASSERT(rval == 0 || rval == EBUSY, "critical_section::trylock: pthread_mutex_trylock failed");
+        gotlock = rval == 0;
+#endif
+        if(gotlock)  {
+            my_tid = local_tid;
+        }
+        return gotlock;
+    }
+
+    void unlock() {
+        __TBB_ASSERT(this_tbb_thread::get_id() == my_tid, "thread unlocking critical_section is not thread that locked it");
+        my_tid = tbb_thread::id();
+#if _WIN32||_WIN64
+        LeaveCriticalSection( &my_impl );
+#else
+        int rval = pthread_mutex_unlock(&my_impl);
+        __TBB_ASSERT_EX(!rval, "critical_section::unlock: pthread_mutex_unlock failed");
+#endif
+    }
+
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = true;
+}; // critical_section_v4
+} // namespace internal
+typedef internal::critical_section_v4 critical_section;
+
+__TBB_DEFINE_PROFILING_SET_NAME(critical_section)
+} // namespace tbb
+#endif  // _TBB_CRITICAL_SECTION_H_
diff --git a/tbb/include/tbb/enumerable_thread_specific.h b/tbb/include/tbb/enumerable_thread_specific.h
new file mode 100644 (file)
index 0000000..c45a428
--- /dev/null
@@ -0,0 +1,998 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_enumerable_thread_specific_H
+#define __TBB_enumerable_thread_specific_H
+
+#include "concurrent_vector.h"
+#include "tbb_thread.h"
+#include "cache_aligned_allocator.h"
+#include "aligned_space.h"
+#include <string.h>  // for memcpy
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#endif
+
+namespace tbb {
+
+//! enum for selecting between single key and key-per-instance versions
+enum ets_key_usage_type { ets_key_per_instance, ets_no_key };
+
+namespace interface6 {
+    //! @cond
+    namespace internal { 
+
+        template<ets_key_usage_type ETS_key_type>
+        class ets_base: tbb::internal::no_copy {
+        protected:
+#if _WIN32||_WIN64
+            typedef DWORD key_type;
+#else
+            typedef pthread_t key_type;
+#endif
+#if __TBB_GCC_3_3_PROTECTED_BROKEN
+        public:
+#endif
+            struct slot;
+
+            struct array {
+                array* next;
+                size_t lg_size;
+                slot& at( size_t k ) {
+                    return ((slot*)(void*)(this+1))[k];
+                }
+                size_t size() const {return (size_t)1<<lg_size;}
+                size_t mask() const {return size()-1;}
+                size_t start( size_t h ) const {
+                    return h>>(8*sizeof(size_t)-lg_size);
+                }
+            };
+            struct slot {
+                key_type key;
+                void* ptr;
+                bool empty() const {return !key;}
+                bool match( key_type k ) const {return key==k;}
+                bool claim( key_type k ) {
+                    __TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_type), NULL);
+                    return tbb::internal::punned_cast<tbb::atomic<key_type>*>(&key)->compare_and_swap(k,0)==0;
+                }
+            };
+#if __TBB_GCC_3_3_PROTECTED_BROKEN
+        protected:
+#endif
+        
+            static key_type key_of_current_thread() {
+               tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id();
+               key_type k;
+               memcpy( &k, &id, sizeof(k) );
+               return k;
+            }
+
+            //! Root of linked list of arrays of decreasing size.
+            /** NULL if and only if my_count==0.  
+                Each array in the list is half the size of its predecessor. */
+            atomic<array*> my_root;
+            atomic<size_t> my_count;
+            virtual void* create_local() = 0;
+            virtual void* create_array(size_t _size) = 0;  // _size in bytes
+            virtual void free_array(void* ptr, size_t _size) = 0; // _size in bytes
+            array* allocate( size_t lg_size ) {
+                size_t n = 1<<lg_size;  
+                array* a = static_cast<array*>(create_array( sizeof(array)+n*sizeof(slot) ));
+                a->lg_size = lg_size;
+                std::memset( a+1, 0, n*sizeof(slot) );
+                return a;
+            }
+            void free(array* a) {
+                size_t n = 1<<(a->lg_size);  
+                free_array( (void *)a, size_t(sizeof(array)+n*sizeof(slot)) );
+            }
+            static size_t hash( key_type k ) {
+                // Multiplicative hashing.  Client should use *upper* bits.
+                // casts required for Mac gcc4.* compiler
+#if __TBB_WORDSIZE == 4
+                return uintptr_t(k)*0x9E3779B9;
+#else
+                return uintptr_t(k)*0x9E3779B97F4A7C15;
+#endif 
+            } 
+        
+            ets_base() {my_root=NULL; my_count=0;}
+            virtual ~ets_base();  // g++ complains if this is not virtual...
+            void* table_lookup( bool& exists );
+            void table_clear();
+            slot& table_find( key_type k ) {
+                size_t h = hash(k);
+                array* r = my_root;
+                size_t mask = r->mask();
+                for(size_t i = r->start(h);;i=(i+1)&mask) {
+                    slot& s = r->at(i);
+                    if( s.empty() || s.match(k) )
+                        return s;
+                }
+            }
+            void table_reserve_for_copy( const ets_base& other ) {
+                __TBB_ASSERT(!my_root,NULL);
+                __TBB_ASSERT(!my_count,NULL);
+                if( other.my_root ) {
+                    array* a = allocate(other.my_root->lg_size);
+                    a->next = NULL;
+                    my_root = a;
+                    my_count = other.my_count;
+                }
+            }
+        };
+
+        template<ets_key_usage_type ETS_key_type>
+        ets_base<ETS_key_type>::~ets_base() {
+            __TBB_ASSERT(!my_root, NULL);
+        }
+
+        template<ets_key_usage_type ETS_key_type>
+        void ets_base<ETS_key_type>::table_clear() {
+            while( array* r = my_root ) {
+                my_root = r->next;
+                free(r);
+            }
+            my_count = 0;
+        }
+                
+        template<ets_key_usage_type ETS_key_type>
+        void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
+            const key_type k = key_of_current_thread(); 
+
+            __TBB_ASSERT(k!=0,NULL);
+            void* found;
+            size_t h = hash(k);
+            for( array* r=my_root; r; r=r->next ) {
+                size_t mask=r->mask();
+                for(size_t i = r->start(h); ;i=(i+1)&mask) {
+                    slot& s = r->at(i);
+                    if( s.empty() ) break;
+                    if( s.match(k) ) {
+                        if( r==my_root ) {
+                            // Success at top level
+                            exists = true;
+                            return s.ptr;
+                        } else {
+                            // Success at some other level.  Need to insert at top level.
+                            exists = true;
+                            found = s.ptr;
+                            goto insert;
+                        }
+                    }
+                }
+            }
+            // Key does not yet exist
+            exists = false;
+            found = create_local();
+            {
+                size_t c = ++my_count;
+                array* r = my_root;
+                if( !r || c>r->size()/2 ) {
+                    size_t s = r ? r->lg_size : 2;
+                    while( c>size_t(1)<<(s-1) ) ++s;
+                    array* a = allocate(s);
+                    for(;;) {
+                        a->next = my_root;
+                        array* new_r = my_root.compare_and_swap(a,r);
+                        if( new_r==r ) break;
+                        if( new_r->lg_size>=s ) {
+                            // Another thread inserted an equal or  bigger array, so our array is superfluous.
+                            free(a);
+                            break;
+                        }
+                        r = new_r;
+                    }
+                }
+            }
+        insert:
+            // Guaranteed to be room for it, and it is not present, so search for empty slot and grab it.
+            array* ir = my_root;
+            size_t mask = ir->mask();
+            for(size_t i = ir->start(h);;i=(i+1)&mask) {
+                slot& s = ir->at(i);
+                if( s.empty() ) {
+                    if( s.claim(k) ) {
+                        s.ptr = found;
+                        return found;
+                    }
+                }
+            }
+        }
+
+        //! Specialization that exploits native TLS 
+        template <>
+        class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key> {
+            typedef ets_base<ets_no_key> super;
+#if _WIN32||_WIN64
+            typedef DWORD tls_key_t;
+            void create_key() { my_key = TlsAlloc(); }
+            void destroy_key() { TlsFree(my_key); }
+            void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
+            void* get_tls() { return (void *)TlsGetValue(my_key); }
+#else
+            typedef pthread_key_t tls_key_t;
+            void create_key() { pthread_key_create(&my_key, NULL); }
+            void destroy_key() { pthread_key_delete(my_key); }
+            void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
+            void* get_tls() const { return pthread_getspecific(my_key); }
+#endif
+            tls_key_t my_key;
+            virtual void* create_local() = 0;
+            virtual void* create_array(size_t _size) = 0;  // _size in bytes
+            virtual void free_array(void* ptr, size_t _size) = 0; // size in bytes
+        public:
+            ets_base() {create_key();}
+            ~ets_base() {destroy_key();}
+            void* table_lookup( bool& exists ) {
+                void* found = get_tls();
+                if( found ) {
+                    exists=true;
+                } else {
+                    found = super::table_lookup(exists);
+                    set_tls(found);
+                }
+                return found; 
+            }
+            void table_clear() {
+                destroy_key();
+                create_key(); 
+                super::table_clear();
+            }
+        };
+
+        //! Random access iterator for traversing the thread local copies.
+        template< typename Container, typename Value >
+        class enumerable_thread_specific_iterator 
+#if defined(_WIN64) && defined(_MSC_VER) 
+            // Ensure that Microsoft's internal template function _Val_type works correctly.
+            : public std::iterator<std::random_access_iterator_tag,Value>
+#endif /* defined(_WIN64) && defined(_MSC_VER) */
+        {
+            //! current position in the concurrent_vector 
+        
+            Container *my_container;
+            typename Container::size_type my_index;
+            mutable Value *my_value;
+        
+            template<typename C, typename T>
+            friend enumerable_thread_specific_iterator<C,T> operator+( ptrdiff_t offset, 
+                                                                       const enumerable_thread_specific_iterator<C,T>& v );
+        
+            template<typename C, typename T, typename U>
+            friend bool operator==( const enumerable_thread_specific_iterator<C,T>& i, 
+                                    const enumerable_thread_specific_iterator<C,U>& j );
+        
+            template<typename C, typename T, typename U>
+            friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, 
+                                   const enumerable_thread_specific_iterator<C,U>& j );
+        
+            template<typename C, typename T, typename U>
+            friend ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, const enumerable_thread_specific_iterator<C,U>& j );
+            
+            template<typename C, typename U> 
+            friend class enumerable_thread_specific_iterator;
+        
+            public:
+        
+            enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : 
+                my_container(&const_cast<Container &>(container)), my_index(index), my_value(NULL) {}
+        
+            //! Default constructor
+            enumerable_thread_specific_iterator() : my_container(NULL), my_index(0), my_value(NULL) {}
+        
+            template<typename U>
+            enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
+                    my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
+        
+            enumerable_thread_specific_iterator operator+( ptrdiff_t offset ) const {
+                return enumerable_thread_specific_iterator(*my_container, my_index + offset);
+            }
+        
+            enumerable_thread_specific_iterator &operator+=( ptrdiff_t offset ) {
+                my_index += offset;
+                my_value = NULL;
+                return *this;
+            }
+        
+            enumerable_thread_specific_iterator operator-( ptrdiff_t offset ) const {
+                return enumerable_thread_specific_iterator( *my_container, my_index-offset );
+            }
+        
+            enumerable_thread_specific_iterator &operator-=( ptrdiff_t offset ) {
+                my_index -= offset;
+                my_value = NULL;
+                return *this;
+            }
+        
+            Value& operator*() const {
+                Value* value = my_value;
+                if( !value ) {
+                    value = my_value = reinterpret_cast<Value *>(&(*my_container)[my_index].value);
+                }
+                __TBB_ASSERT( value==reinterpret_cast<Value *>(&(*my_container)[my_index].value), "corrupt cache" );
+                return *value;
+            }
+        
+            Value& operator[]( ptrdiff_t k ) const {
+               return (*my_container)[my_index + k].value;
+            }
+        
+            Value* operator->() const {return &operator*();}
+        
+            enumerable_thread_specific_iterator& operator++() {
+                ++my_index;
+                my_value = NULL;
+                return *this;
+            }
+        
+            enumerable_thread_specific_iterator& operator--() {
+                --my_index;
+                my_value = NULL;
+                return *this;
+            }
+        
+            //! Post increment
+            enumerable_thread_specific_iterator operator++(int) {
+                enumerable_thread_specific_iterator result = *this;
+                ++my_index;
+                my_value = NULL;
+                return result;
+            }
+        
+            //! Post decrement
+            enumerable_thread_specific_iterator operator--(int) {
+                enumerable_thread_specific_iterator result = *this;
+                --my_index;
+                my_value = NULL;
+                return result;
+            }
+        
+            // STL support
+            typedef ptrdiff_t difference_type;
+            typedef Value value_type;
+            typedef Value* pointer;
+            typedef Value& reference;
+            typedef std::random_access_iterator_tag iterator_category;
+        };
+        
+        template<typename Container, typename T>
+        enumerable_thread_specific_iterator<Container,T> operator+( ptrdiff_t offset, 
+                                                                    const enumerable_thread_specific_iterator<Container,T>& v ) {
+            return enumerable_thread_specific_iterator<Container,T>( v.my_container, v.my_index + offset );
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator==( const enumerable_thread_specific_iterator<Container,T>& i, 
+                         const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return i.my_index==j.my_index && i.my_container == j.my_container;
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                         const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return !(i==j);
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, 
+                        const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return i.my_index<j.my_index;
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, 
+                        const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return j<i;
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                         const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return !(i<j);
+        }
+        
+        template<typename Container, typename T, typename U>
+        bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                         const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return !(j<i);
+        }
+        
+        template<typename Container, typename T, typename U>
+        ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, 
+                             const enumerable_thread_specific_iterator<Container,U>& j ) {
+            return i.my_index-j.my_index;
+        }
+
+    template<typename SegmentedContainer, typename Value >
+        class segmented_iterator
+#if defined(_WIN64) && defined(_MSC_VER)
+        : public std::iterator<std::input_iterator_tag, Value>
+#endif
+        {
+            template<typename C, typename T, typename U>
+            friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
+
+            template<typename C, typename T, typename U>
+            friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
+            
+            template<typename C, typename U> 
+            friend class segmented_iterator;
+
+            public:
+
+                segmented_iterator() {my_segcont = NULL;}
+
+                segmented_iterator( const SegmentedContainer& _segmented_container ) : 
+                    my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
+                    outer_iter(my_segcont->end()) { }
+
+                ~segmented_iterator() {}
+
+                typedef typename SegmentedContainer::iterator outer_iterator;
+                typedef typename SegmentedContainer::value_type InnerContainer;
+                typedef typename InnerContainer::iterator inner_iterator;
+
+                // STL support
+                typedef ptrdiff_t difference_type;
+                typedef Value value_type;
+                typedef typename SegmentedContainer::size_type size_type;
+                typedef Value* pointer;
+                typedef Value& reference;
+                typedef std::input_iterator_tag iterator_category;
+
+                // Copy Constructor
+                template<typename U>
+                segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
+                    my_segcont(other.my_segcont),
+                    outer_iter(other.outer_iter),
+                    // can we assign a default-constructed iterator to inner if we're at the end?
+                    inner_iter(other.inner_iter)
+                {}
+
+                // assignment
+                template<typename U>
+                segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
+                    if(this != &other) {
+                        my_segcont = other.my_segcont;
+                        outer_iter = other.outer_iter;
+                        if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
+                    }
+                    return *this;
+                }
+
+                // allow assignment of outer iterator to segmented iterator.  Once it is
+                // assigned, move forward until a non-empty inner container is found or
+                // the end of the outer container is reached.
+                segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
+                    __TBB_ASSERT(my_segcont != NULL, NULL);
+                    // check that this iterator points to something inside the segmented container
+                    for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
+                        if( !outer_iter->empty() ) {
+                            inner_iter = outer_iter->begin();
+                            break;
+                        }
+                    }
+                    return *this;
+                }
+
+                // pre-increment
+                segmented_iterator& operator++() {
+                    advance_me();
+                    return *this;
+                }
+
+                // post-increment
+                segmented_iterator operator++(int) {
+                    segmented_iterator tmp = *this;
+                    operator++();
+                    return tmp;
+                }
+
+                bool operator==(const outer_iterator& other_outer) const {
+                    __TBB_ASSERT(my_segcont != NULL, NULL);
+                    return (outer_iter == other_outer &&
+                            (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
+                }
+
+                bool operator!=(const outer_iterator& other_outer) const {
+                    return !operator==(other_outer);
+
+                }
+
+                // (i)* RHS
+                reference operator*() const {
+                    __TBB_ASSERT(my_segcont != NULL, NULL);
+                    __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
+                    __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen
+                    return *inner_iter;
+                }
+
+                // i->
+                pointer operator->() const { return &operator*();}
+
+            private:
+                SegmentedContainer*             my_segcont;
+                outer_iterator outer_iter;
+                inner_iterator inner_iter;
+
+                void advance_me() {
+                    __TBB_ASSERT(my_segcont != NULL, NULL);
+                    __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers
+                    __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty.
+                    ++inner_iter;
+                    while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
+                        inner_iter = outer_iter->begin();
+                    }
+                }
+        };    // segmented_iterator
+
+        template<typename SegmentedContainer, typename T, typename U>
+        bool operator==( const segmented_iterator<SegmentedContainer,T>& i, 
+                         const segmented_iterator<SegmentedContainer,U>& j ) {
+            if(i.my_segcont != j.my_segcont) return false;
+            if(i.my_segcont == NULL) return true;
+            if(i.outer_iter != j.outer_iter) return false;
+            if(i.outer_iter == i.my_segcont->end()) return true;
+            return i.inner_iter == j.inner_iter;
+        }
+
+        // !=
+        template<typename SegmentedContainer, typename T, typename U>
+        bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, 
+                         const segmented_iterator<SegmentedContainer,U>& j ) {
+            return !(i==j);
+        }
+
+        template<typename T>
+        struct destruct_only: tbb::internal::no_copy {
+            tbb::aligned_space<T,1> value;
+            ~destruct_only() {value.begin()[0].~T();}
+        };
+
+        template<typename T>
+        struct construct_by_default: tbb::internal::no_assign {
+            void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
+            construct_by_default( int ) {}
+        };
+
+        template<typename T>
+        struct construct_by_exemplar: tbb::internal::no_assign {
+            const T exemplar;
+            void construct(void*where) {new(where) T(exemplar);}
+            construct_by_exemplar( const T& t ) : exemplar(t) {}
+        };
+
+        template<typename T, typename Finit>
+        struct construct_by_finit: tbb::internal::no_assign {
+            Finit f;
+            void construct(void* where) {new(where) T(f());}
+            construct_by_finit( const Finit& f_ ) : f(f_) {}
+        };
+
+        // storage for initialization function pointer
+        template<typename T>
+        class callback_base {
+        public:
+            // Clone *this
+            virtual callback_base* clone() = 0;
+            // Destruct and free *this
+            virtual void destroy() = 0;
+            // Need virtual destructor to satisfy GCC compiler warning
+            virtual ~callback_base() { }
+            // Construct T at where
+            virtual void construct(void* where) = 0;
+        };
+
+        template <typename T, typename Constructor>
+        class callback_leaf: public callback_base<T>, Constructor {
+            template<typename X> callback_leaf( const X& x ) : Constructor(x) {}
+
+            typedef typename tbb::tbb_allocator<callback_leaf> my_allocator_type;
+
+            /*override*/ callback_base<T>* clone() {
+                void* where = my_allocator_type().allocate(1);
+                return new(where) callback_leaf(*this);
+            }
+
+            /*override*/ void destroy() {
+                my_allocator_type().destroy(this);
+                my_allocator_type().deallocate(this,1);
+            }
+
+            /*override*/ void construct(void* where) {
+                Constructor::construct(where);
+            }  
+        public:
+            template<typename X>
+            static callback_base<T>* make( const X& x ) {
+                void* where = my_allocator_type().allocate(1);
+                return new(where) callback_leaf(x);
+            }
+        };
+
+        //! Template for adding padding in order to avoid false sharing
+        /** ModularSize should be sizeof(U) modulo the cache line size.
+            All maintenance of the space will be done explicitly on push_back,
+            and all thread local copies must be destroyed before the concurrent
+            vector is deleted.
+        */
+        template<typename U, size_t ModularSize>
+        struct ets_element {
+            char value[ModularSize==0 ? sizeof(U) : sizeof(U)+(tbb::internal::NFS_MaxLineSize-ModularSize)];
+            void unconstruct() {
+                tbb::internal::punned_cast<U*>(&value)->~U();
+            }
+        };
+
+    } // namespace internal
+    //! @endcond
+
+    //! The enumerable_thread_specific container
+    /** enumerable_thread_specific has the following properties:
+        - thread-local copies are lazily created, with default, exemplar or function initialization.
+        - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
+        - the contained objects need not have operator=() defined if combine is not used.
+        - enumerable_thread_specific containers may be copy-constructed or assigned.
+        - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
+        - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
+        
+    @par Segmented iterator
+        When the thread-local objects are containers with input_iterators defined, a segmented iterator may
+        be used to iterate over all the elements of all thread-local copies.
+
+    @par combine and combine_each
+        - Both methods are defined for enumerable_thread_specific. 
+        - combine() requires the the type T have operator=() defined.  
+        - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)  
+        - Both are evaluated in serial context (the methods are assumed to be non-benign.)
+        
+    @ingroup containers */
+    template <typename T, 
+              typename Allocator=cache_aligned_allocator<T>, 
+              ets_key_usage_type ETS_key_type=ets_no_key > 
+    class enumerable_thread_specific: internal::ets_base<ETS_key_type> { 
+
+        template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
+    
+        typedef internal::ets_element<T,sizeof(T)%tbb::internal::NFS_MaxLineSize> padded_element;
+
+        //! A generic range, used to create range objects from the iterators
+        template<typename I>
+        class generic_range_type: public blocked_range<I> {
+        public:
+            typedef T value_type;
+            typedef T& reference;
+            typedef const T& const_reference;
+            typedef I iterator;
+            typedef ptrdiff_t difference_type;
+            generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} 
+            template<typename U>
+            generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} 
+            generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
+        };
+    
+        typedef typename Allocator::template rebind< padded_element >::other padded_allocator_type;
+        typedef tbb::concurrent_vector< padded_element, padded_allocator_type > internal_collection_type;
+        
+        internal::callback_base<T> *my_construct_callback;
+
+        internal_collection_type my_locals;
+   
+        /*override*/ void* create_local() {
+#if TBB_DEPRECATED
+            void* lref = &my_locals[my_locals.push_back(padded_element())];
+#else
+            void* lref = &*my_locals.push_back(padded_element());
+#endif
+            my_construct_callback->construct(lref);
+            return lref;
+        } 
+
+        void unconstruct_locals() {
+            for(typename internal_collection_type::iterator cvi = my_locals.begin(); cvi != my_locals.end(); ++cvi) {
+                cvi->unconstruct();
+            }
+        }
+
+        typedef typename Allocator::template rebind< uintptr_t >::other array_allocator_type;
+
+        // _size is in bytes
+        /*override*/ void* create_array(size_t _size) {
+            size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
+            return array_allocator_type().allocate(nelements);
+        }
+
+        /*override*/ void free_array( void* _ptr, size_t _size) {
+            size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
+            array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
+        }
+   
+    public:
+    
+        //! Basic types
+        typedef Allocator allocator_type;
+        typedef T value_type;
+        typedef T& reference;
+        typedef const T& const_reference;
+        typedef T* pointer;
+        typedef const T* const_pointer;
+        typedef typename internal_collection_type::size_type size_type;
+        typedef typename internal_collection_type::difference_type difference_type;
+    
+        // Iterator types
+        typedef typename internal::enumerable_thread_specific_iterator< internal_collection_type, value_type > iterator;
+        typedef typename internal::enumerable_thread_specific_iterator< internal_collection_type, const value_type > const_iterator;
+
+        // Parallel range types
+        typedef generic_range_type< iterator > range_type;
+        typedef generic_range_type< const_iterator > const_range_type;
+    
+        //! Default constructor.  Each local instance of T is default constructed.
+        enumerable_thread_specific() : 
+            my_construct_callback( internal::callback_leaf<T,internal::construct_by_default<T> >::make(/*dummy argument*/0) ) 
+        {}
+
+        //! Constructor with initializer functor.  Each local instance of T is constructed by T(finit()).
+        template <typename Finit>
+        enumerable_thread_specific( Finit finit ) : 
+            my_construct_callback( internal::callback_leaf<T,internal::construct_by_finit<T,Finit> >::make( finit ) ) 
+        {}
+    
+        //! Constuctor with exemplar.  Each local instance of T is copied-constructed from the exemplar.
+        enumerable_thread_specific(const T& exemplar) : 
+            my_construct_callback( internal::callback_leaf<T,internal::construct_by_exemplar<T> >::make( exemplar ) )
+        {}
+    
+        //! Destructor
+        ~enumerable_thread_specific() { 
+            my_construct_callback->destroy();
+            this->clear();  // deallocation before the derived class is finished destructing
+            // So free(array *) is still accessible
+        }
+      
+        //! returns reference to local, discarding exists
+        reference local() {
+            bool exists;
+            return local(exists);
+        }
+
+        //! Returns reference to calling thread's local copy, creating one if necessary
+        reference local(bool& exists)  {
+            void* ptr = this->table_lookup(exists);
+            return *(T*)ptr;
+        }
+
+        //! Get the number of local copies
+        size_type size() const { return my_locals.size(); }
+    
+        //! true if there have been no local copies created
+        bool empty() const { return my_locals.empty(); }
+    
+        //! begin iterator
+        iterator begin() { return iterator( my_locals, 0 ); }
+        //! end iterator
+        iterator end() { return iterator(my_locals, my_locals.size() ); }
+    
+        //! begin const iterator
+        const_iterator begin() const { return const_iterator(my_locals, 0); }
+    
+        //! end const iterator
+        const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
+
+        //! Get range for parallel algorithms
+        range_type range( size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } 
+        
+        //! Get const range for parallel algorithms
+        const_range_type range( size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
+
+        //! Destroys local copies
+        void clear() {
+            unconstruct_locals();
+            my_locals.clear();
+            this->table_clear();
+            // callback is not destroyed
+            // exemplar is not destroyed
+        }
+
+    private:
+
+        template<typename U, typename A2, ets_key_usage_type C2>
+        void internal_copy( const enumerable_thread_specific<U, A2, C2>& other);
+
+    public:
+
+        template<typename U, typename Alloc, ets_key_usage_type Cachetype>
+        enumerable_thread_specific( const enumerable_thread_specific<U, Alloc, Cachetype>& other ) : internal::ets_base<ETS_key_type> ()
+        {
+            internal_copy(other);
+        }
+
+        enumerable_thread_specific( const enumerable_thread_specific& other ) : internal::ets_base<ETS_key_type> ()
+        {
+            internal_copy(other);
+        }
+
+    private:
+
+        template<typename U, typename A2, ets_key_usage_type C2>
+        enumerable_thread_specific &
+        internal_assign(const enumerable_thread_specific<U, A2, C2>& other) {
+            if(static_cast<void *>( this ) != static_cast<const void *>( &other )) {
+                this->clear(); 
+                my_construct_callback->destroy();
+                my_construct_callback = 0;
+                internal_copy( other );
+            }
+            return *this;
+        }
+
+    public:
+
+        // assignment
+        enumerable_thread_specific& operator=(const enumerable_thread_specific& other) {
+            return internal_assign(other);
+        }
+
+        template<typename U, typename Alloc, ets_key_usage_type Cachetype>
+        enumerable_thread_specific& operator=(const enumerable_thread_specific<U, Alloc, Cachetype>& other)
+        {
+            return internal_assign(other);
+        }
+
+        // combine_func_t has signature T(T,T) or T(const T&, const T&)
+        template <typename combine_func_t>
+        T combine(combine_func_t f_combine) {
+            if(begin() == end()) {
+                internal::destruct_only<T> location;
+                my_construct_callback->construct(location.value.begin());
+                return *location.value.begin();
+            }
+            const_iterator ci = begin();
+            T my_result = *ci;
+            while(++ci != end()) 
+                my_result = f_combine( my_result, *ci );
+            return my_result;
+        }
+
+        // combine_func_t has signature void(T) or void(const T&)
+        template <typename combine_func_t>
+        void combine_each(combine_func_t f_combine) {
+            for(const_iterator ci = begin(); ci != end(); ++ci) {
+                f_combine( *ci );
+            }
+        }
+
+    }; // enumerable_thread_specific
+
+    template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> 
+    template<typename U, typename A2, ets_key_usage_type C2>
+    void enumerable_thread_specific<T,Allocator,ETS_key_type>::internal_copy( const enumerable_thread_specific<U, A2, C2>& other) {
+        // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
+        my_construct_callback = other.my_construct_callback->clone();
+
+        typedef internal::ets_base<ets_no_key> base;
+        __TBB_ASSERT(my_locals.size()==0,NULL);
+        this->table_reserve_for_copy( other );
+        for( base::array* r=other.my_root; r; r=r->next ) {
+            for( size_t i=0; i<r->size(); ++i ) {
+                base::slot& s1 = r->at(i);
+                if( !s1.empty() ) {
+                    base::slot& s2 = this->table_find(s1.key);
+                    if( s2.empty() ) { 
+#if TBB_DEPRECATED
+                        void* lref = &my_locals[my_locals.push_back(padded_element())];
+#else
+                        void* lref = &*my_locals.push_back(padded_element());
+#endif
+                        s2.ptr = new(lref) T(*(U*)s1.ptr);
+                        s2.key = s1.key;
+                    } else {
+                        // Skip the duplicate
+                    } 
+                }
+            }
+        }
+    }
+
+    template< typename Container >
+    class flattened2d {
+
+        // This intermediate typedef is to address issues with VC7.1 compilers
+        typedef typename Container::value_type conval_type;
+
+    public:
+
+        //! Basic types
+        typedef typename conval_type::size_type size_type;
+        typedef typename conval_type::difference_type difference_type;
+        typedef typename conval_type::allocator_type allocator_type;
+        typedef typename conval_type::value_type value_type;
+        typedef typename conval_type::reference reference;
+        typedef typename conval_type::const_reference const_reference;
+        typedef typename conval_type::pointer pointer;
+        typedef typename conval_type::const_pointer const_pointer;
+
+        typedef typename internal::segmented_iterator<Container, value_type> iterator;
+        typedef typename internal::segmented_iterator<Container, const value_type> const_iterator;
+
+        flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : 
+            my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
+
+        flattened2d( const Container &c ) : 
+            my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
+
+        iterator begin() { return iterator(*my_container) = my_begin; }
+        iterator end() { return iterator(*my_container) = my_end; }
+        const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
+        const_iterator end() const { return const_iterator(*my_container) = my_end; }
+
+        size_type size() const {
+            size_type tot_size = 0;
+            for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
+                tot_size += i->size();
+            }
+            return tot_size;
+        }
+
+    private:
+
+        Container *my_container;
+        typename Container::const_iterator my_begin;
+        typename Container::const_iterator my_end;
+
+    };
+
+    template <typename Container>
+    flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
+        return flattened2d<Container>(c, b, e);
+    }
+
+    template <typename Container>
+    flattened2d<Container> flatten2d(const Container &c) {
+        return flattened2d<Container>(c);
+    }
+
+} // interface6
+
+namespace internal {
+using interface6::internal::segmented_iterator;
+}
+
+using interface6::enumerable_thread_specific;
+using interface6::flattened2d;
+using interface6::flatten2d;
+
+} // namespace tbb
+
+#endif
diff --git a/tbb/include/tbb/graph.h b/tbb/include/tbb/graph.h
new file mode 100644 (file)
index 0000000..c673e07
--- /dev/null
@@ -0,0 +1,3177 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_graph_H
+#define __TBB_graph_H
+
+#if !TBB_PREVIEW_GRAPH
+#error Set TBB_PREVIEW_GRAPH to include graph.h
+#endif
+
+#include "tbb_stddef.h"
+#include "atomic.h"
+#include "spin_mutex.h"
+#include "null_mutex.h"
+#include "spin_rw_mutex.h"
+#include "null_rw_mutex.h"
+#include "task.h"
+#include "concurrent_vector.h"
+#include "_aggregator_internal.h"
+
+// use the VC10 or gcc version of tuple if it is available.
+#if TBB_IMPLEMENT_CPP0X && (!defined(_MSC_VER) || _MSC_VER < 1600)
+#define TBB_PREVIEW_TUPLE 1
+#include "compat/tuple"
+#else
+#include <tuple>
+#endif
+
+#include<list>
+#include<queue>
+
+/** @file
+  \brief The graph related classes and functions
+
+  There are some applications that best express dependencies as messages
+  passed between nodes in a graph.  These messages may contain data or
+  simply act as signals that a predecessors has completed. The graph
+  class and its associated node classes can be used to express such
+  applcations.
+*/
+
+namespace tbb {
+
+    //! The base of all graph nodes.  Allows them to be stored in a collection for deletion.
+    class graph_node {
+    public:
+        virtual ~graph_node() {} 
+    }; 
+
+    //! An empty class used for messages that mean "I'm done" 
+    class continue_msg {};
+
+    template< typename T > class sender;
+    template< typename T > class receiver;
+    class continue_receiver;
+
+    //! Pure virtual template class that defines a sender of messages of type T
+    template< typename T >
+    class sender {
+    public:
+        //! The output type of this sender
+        typedef T output_type;
+
+        //! The successor type for this node
+        typedef receiver<T> successor_type;
+
+        virtual ~sender() {}
+
+        //! Add a new successor to this node
+        virtual bool register_successor( successor_type &r ) = 0;
+
+        //! Removes a successor from this node
+        virtual bool remove_successor( successor_type &r ) = 0;
+
+        //! Request an item from the sender
+        virtual bool try_get( T & ) { return false; }
+
+        //! Reserves an item in the sender 
+        virtual bool try_reserve( T & ) { return false; }
+
+        //! Releases the reserved item
+        virtual bool try_release( ) { return false; }
+
+        //! Consumes the reserved item
+        virtual bool try_consume( ) { return false; }
+
+    };
+
+
+    //! Pure virtual template class that defines a receiver of messages of type T
+    template< typename T >
+    class receiver {
+    public:
+
+        //! The input type of this receiver
+        typedef T input_type;
+
+        //! The predecessor type for this node
+        typedef sender<T> predecessor_type;
+
+        //! Destructor
+        virtual ~receiver() {}
+
+        //! Put an item to the receiver
+        virtual bool try_put( T t ) = 0;
+
+        //! Add a predecessor to the node
+        virtual bool register_predecessor( predecessor_type & ) { return false; }
+
+        //! Remove a predecessor from the node
+        virtual bool remove_predecessor( predecessor_type & ) { return false; }
+
+    };
+
+    //! Base class for receivers of completion messages
+    /** These receivers automatically reset, but cannot be explicitly waited on */
+    class continue_receiver : public receiver< continue_msg > {
+    public:
+
+        //! The input type
+        typedef continue_msg input_type;
+
+        //! The predecessor type for this node
+        typedef sender< continue_msg > predecessor_type;
+
+        //! Constructor
+        continue_receiver( int number_of_predecessors = 0 ) { 
+            my_predecessor_count = number_of_predecessors;
+            my_current_count = 0;
+        }
+
+        //! Destructor
+        virtual ~continue_receiver() { }
+
+        //! Increments the trigger threshold
+        /* override */ bool register_predecessor( predecessor_type & ) {
+            spin_mutex::scoped_lock l(my_mutex);
+            ++my_predecessor_count;
+            return true;
+        }
+
+        //! Decrements the trigger threshold
+        /** Does not check to see if the removal of the predecessor now makes the current count
+            exceed the new threshold.  So removing a predecessor while the graph is active can cause
+            unexpected results. */
+        /* override */ bool remove_predecessor( predecessor_type & ) {
+            spin_mutex::scoped_lock l(my_mutex);
+            --my_predecessor_count;
+            return true;
+        }
+
+        //! Puts a continue_msg to the receiver
+        /** If the message causes the message count to reach the predecessor count, execute() is called and
+            the message count is reset to 0.  Otherwise the message count is incremented. */
+        /* override */ bool try_put( input_type ) {
+            {
+                spin_mutex::scoped_lock l(my_mutex);
+                if ( ++my_current_count < my_predecessor_count ) 
+                    return true;
+                else
+                    my_current_count = 0;
+            }
+            execute();
+            return true;
+        }
+
+    protected:
+
+        spin_mutex my_mutex;
+        int my_predecessor_count;
+        int my_current_count;
+
+        //! Does whatever should happen when the threshold is reached
+        /** This should be very fast or else spawn a task.  This is
+            called while the sender is blocked in the try_put(). */
+        virtual void execute() = 0;
+
+    };
+
+    //! @cond INTERNAL
+    namespace internal {
+
+        //! The state of an executable node
+        enum node_state { node_state_idle=0, node_state_nonidle=1, node_state_inactive=2 };
+
+
+        //! A functor that takes no input and generates a value of type Output
+        template< typename Output >
+        class source_body : no_assign   {
+        public:
+            virtual ~source_body() {}
+            virtual bool operator()(Output &output) = 0;
+        };
+
+        //! The leaf for source_body
+        template< typename Output, typename Body>
+        class source_body_leaf : public source_body<Output> {
+        public:
+            source_body_leaf( Body _body ) : body(_body) { }
+            /*override */ bool operator()(Output &output) { return body( output ); }
+        private:
+            Body body;
+        };
+
+        //! A functor that takes an Input and generates an Output
+        template< typename Input, typename Output >
+            class function_body : no_assign {
+        public:
+            virtual ~function_body() {}
+            virtual Output operator()(Input input) = 0;
+        };
+
+        //! the leaf for function_body
+        template <typename Input, typename Output, typename B>
+        class function_body_leaf : public function_body< Input, Output > {
+        public:
+            function_body_leaf( B _body ) : body(_body) { }
+            Output operator()(Input i) { return body(i); }
+
+        private:
+            B body;
+        };
+
+        //! the leaf for function_body specialized for Input and output of continue_msg
+        template <typename B>
+        class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
+        public:
+            function_body_leaf( B _body ) : body(_body) { }
+            continue_msg operator()( continue_msg i ) { 
+                body(i); 
+                return i; 
+            }
+
+        private:
+            B body;
+        };
+
+        //! the leaf for function_body specialized for Output of continue_msg
+        template <typename Input, typename B>
+        class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
+        public:
+            function_body_leaf( B _body ) : body(_body) { }
+            continue_msg operator()(Input i) { 
+                body(i); 
+                return continue_msg();
+            }
+
+        private:
+            B body;
+        };
+
+        //! the leaf for function_body specialized for Input of continue_msg
+        template <typename Output, typename B>
+        class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
+        public:
+            function_body_leaf( B _body ) : body(_body) { }
+            Output operator()(continue_msg i) { 
+                return body(i); 
+            }
+
+        private:
+            B body;
+        };
+
+        //! A task that calls a node's forward function
+        template< typename NodeType >
+        class forward_task : public task {
+
+            NodeType &my_node;
+
+        public:
+
+            forward_task( NodeType &n ) : my_node(n) {}
+
+            task *execute() {
+                my_node.forward();
+                return NULL;
+            }
+        };
+
+        //! A task that calls a node's apply_body function, passing in an input of type Input
+        template< typename NodeType, typename Input >
+        class apply_body_task : public task {
+
+            NodeType &my_node;
+            Input my_input;
+
+        public:
+
+            apply_body_task( NodeType &n, Input i ) : my_node(n), my_input(i) {}
+
+            task *execute() {
+                my_node.apply_body( my_input );
+                return NULL;
+            }
+        };
+
+        //! A task that calls a node's apply_body function with no input
+        template< typename NodeType >
+        class source_task : public task {
+
+            NodeType &my_node;
+
+        public:
+
+            source_task( NodeType &n ) : my_node(n) {}
+
+            task *execute() {
+                my_node.apply_body( );
+                return NULL;
+            }
+        };
+
+        //! An empty functor that takes an Input and returns a default constructed Output
+        template< typename Input, typename Output >
+        struct empty_body {
+           Output operator()( const Input & ) const { return Output(); } 
+        };
+
+        //! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock. 
+        template< typename T, typename M=spin_mutex >
+        class node_cache {
+            public:
+
+            typedef size_t size_type;
+
+            bool empty() {
+                typename my_mutex_type::scoped_lock lock( my_mutex );
+                return internal_empty();
+            }
+
+            void add( T &n ) {
+                typename my_mutex_type::scoped_lock lock( my_mutex );
+                internal_push(n);
+            }
+
+            void remove( T &n ) {
+                typename my_mutex_type::scoped_lock lock( my_mutex );
+                for ( size_t i = internal_size(); i != 0; --i ) {
+                    T &s = internal_pop();
+                    if ( &s != &n ) {
+                        internal_push(s);
+                    }
+                }
+            }
+
+        protected:
+
+            typedef M my_mutex_type;
+            my_mutex_type my_mutex;
+            std::queue< T * > my_q;
+
+            // Assumes lock is held
+            inline bool internal_empty( )  {
+                return my_q.empty();
+            }
+
+            // Assumes lock is held
+            inline size_type internal_size( )  {
+                return my_q.size(); 
+            }
+
+            // Assumes lock is held
+            inline void internal_push( T &n )  {
+                my_q.push(&n);
+            }
+
+            // Assumes lock is held
+            inline T &internal_pop() {
+                T *v = my_q.front();
+                my_q.pop();
+                return *v;
+            }
+
+        };
+
+        //! A cache of predecessors that only supports try_get
+        template< typename T, typename M=spin_mutex >
+        class predecessor_cache : public node_cache< sender<T>, M > {
+            public:
+            typedef M my_mutex_type;
+            typedef T output_type; 
+            typedef sender<output_type> predecessor_type;
+            typedef receiver<output_type> successor_type;
+
+            predecessor_cache( ) : my_owner( NULL ) { }
+
+            void set_owner( successor_type *owner ) { my_owner = owner; }
+
+            bool get_item( output_type &v ) {
+
+                bool msg = false;
+
+                do {
+                    predecessor_type *src;
+                    {
+                        typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                        if ( this->internal_empty() ) {
+                            break;
+                        }
+                        src = &this->internal_pop();
+                    }
+
+                    // Try to get from this sender
+                    msg = src->try_get( v );
+
+                    if (msg == false) {
+                        // Relinquish ownership of the edge
+                        if ( my_owner) 
+                            src->register_successor( *my_owner );
+                    } else {
+                        // Retain ownership of the edge
+                        this->add(*src);
+                    }
+                } while ( msg == false );
+                return msg;
+            }
+
+        protected:
+            successor_type *my_owner;
+        };
+
+        //! An cache of predecessors that supports requests and reservations
+        template< typename T, typename M=spin_mutex >
+        class reservable_predecessor_cache : public predecessor_cache< T, M > {
+        public:
+            typedef M my_mutex_type;
+            typedef T output_type; 
+            typedef sender<T> predecessor_type;
+            typedef receiver<T> successor_type;
+
+            reservable_predecessor_cache( ) : reserved_src(NULL) { }
+
+            bool 
+            try_reserve( output_type &v ) {
+                bool msg = false;
+
+                do {
+                    {
+                        typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                        if ( reserved_src || this->internal_empty() ) 
+                            return false;
+
+                        reserved_src = &this->internal_pop();
+                    }
+
+                    // Try to get from this sender
+                    msg = reserved_src->try_reserve( v );
+
+                    if (msg == false) {
+                        typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                        // Relinquish ownership of the edge
+                        reserved_src->register_successor( *this->my_owner );
+                        reserved_src = NULL;
+                    } else {
+                        // Retain ownership of the edge
+                        this->add( *reserved_src );
+                    }
+                } while ( msg == false );
+
+                return msg;
+            }
+
+            bool 
+            try_release( ) {
+                reserved_src->try_release( );
+                reserved_src = NULL;
+                return true;
+            }
+
+            bool 
+            try_consume( ) {
+                reserved_src->try_consume( );
+                reserved_src = NULL;
+                return true;
+            }
+
+        private:
+            predecessor_type *reserved_src;
+        };
+
+
+        //! An abstract cache of succesors
+        template<typename T, typename M=spin_rw_mutex >
+        class successor_cache : no_copy {
+        protected:
+
+            typedef M my_mutex_type;
+            my_mutex_type my_mutex;
+
+            typedef std::list< receiver<T> * > my_successors_type;
+            my_successors_type my_successors;
+
+            sender<T> *my_owner;
+
+        public:
+
+            successor_cache( ) : my_owner(NULL) {}
+
+            void set_owner( sender<T> *owner ) { my_owner = owner; }
+
+            virtual ~successor_cache() {}
+
+            void register_successor( receiver<T> &r ) {
+                typename my_mutex_type::scoped_lock l(my_mutex, true);
+                my_successors.push_back( &r ); 
+            }
+
+            void remove_successor( receiver<T> &r ) {
+                typename my_mutex_type::scoped_lock l(my_mutex, true);
+                for ( typename my_successors_type::iterator i = my_successors.begin();
+                      i != my_successors.end(); ++i ) { 
+                    if ( *i == & r ) { 
+                        my_successors.erase(i);
+                        break;
+                    }
+                }
+            }
+
+            bool empty() { 
+                typename my_mutex_type::scoped_lock l(my_mutex, false);
+                return my_successors.empty(); 
+            }
+
+            virtual bool try_put( T t ) = 0; 
+         };
+
+        //! An abstract cache of succesors, specialized to continue_msg
+        template<>
+        class successor_cache< continue_msg > : no_copy {
+        protected:
+
+            typedef spin_rw_mutex my_mutex_type;
+            my_mutex_type my_mutex;
+
+            typedef std::list< receiver<continue_msg> * > my_successors_type;
+            my_successors_type my_successors;
+
+            sender<continue_msg> *my_owner;
+
+        public:
+
+            successor_cache( ) : my_owner(NULL) {}
+
+            void set_owner( sender<continue_msg> *owner ) { my_owner = owner; }
+
+            virtual ~successor_cache() {}
+
+            void register_successor( receiver<continue_msg> &r ) {
+                my_mutex_type::scoped_lock l(my_mutex, true);
+                my_successors.push_back( &r ); 
+                if ( my_owner )
+                    r.register_predecessor( *my_owner );
+            }
+
+            void remove_successor( receiver<continue_msg> &r ) {
+                my_mutex_type::scoped_lock l(my_mutex, true);
+                for ( my_successors_type::iterator i = my_successors.begin();
+                      i != my_successors.end(); ++i ) { 
+                    if ( *i == & r ) { 
+                        if ( my_owner )
+                            r.remove_predecessor( *my_owner );
+                        my_successors.erase(i);
+                        break;
+                    }
+                }
+            }
+
+            bool empty() { 
+                my_mutex_type::scoped_lock l(my_mutex, false);
+                return my_successors.empty(); 
+            }
+
+            virtual bool try_put( continue_msg t ) = 0; 
+
+         };
+
+        //! A cache of successors that are broadcast to
+        template<typename T, typename M=spin_rw_mutex>
+        class broadcast_cache : public successor_cache<T, M> {
+            typedef M my_mutex_type;
+            typedef std::list< receiver<T> * > my_successors_type;
+
+        public:
+
+            broadcast_cache( ) {}
+
+            bool try_put( T t ) {
+                bool msg = false;
+                bool upgraded = false;
+                typename my_mutex_type::scoped_lock l(this->my_mutex, false);
+                typename my_successors_type::iterator i = this->my_successors.begin();
+                while ( i != this->my_successors.end() ) {
+                   if ( (*i)->try_put( t ) == true ) {
+                       ++i;
+                       msg = true;
+                   } else {
+                      if ( (*i)->register_predecessor(*this->my_owner) ) {
+                          if (!upgraded) {
+                              l.upgrade_to_writer();
+                              upgraded = true;
+                          }
+                          i = this->my_successors.erase(i);
+                      }
+                      else {
+                          ++i;
+                      }
+                   }
+                }
+                return msg;
+            }
+        };
+
+        //! A cache of successors that are put in a round-robin fashion
+        template<typename T, typename M=spin_rw_mutex >
+        class round_robin_cache : public successor_cache<T, M> {
+            typedef size_t size_type;
+            typedef M my_mutex_type;
+            typedef std::list< receiver<T> * > my_successors_type;
+
+        public:
+
+            round_robin_cache( ) {}
+
+            size_type size() {
+                typename my_mutex_type::scoped_lock l(this->my_mutex, false);
+                return this->my_successors.size();
+            }
+
+            bool try_put( T t ) {
+                bool upgraded = false;
+                typename my_mutex_type::scoped_lock l(this->my_mutex, false);
+                typename my_successors_type::iterator i = this->my_successors.begin();
+                while ( i != this->my_successors.end() ) {
+                   if ( (*i)->try_put( t ) ) {
+                       return true;
+                   } else {
+                      if ( (*i)->register_predecessor(*this->my_owner) ) {
+                          if (!upgraded) {
+                              l.upgrade_to_writer();
+                              upgraded = true;
+                          }
+                          i = this->my_successors.erase(i);
+                      }
+                      else {
+                          ++i;
+                      }
+                   }
+                }
+                return false;
+            }
+        };
+
+        template<typename T>
+        class decrementer : public continue_receiver, internal::no_copy {
+
+            T *my_node;
+
+            void execute() {
+                my_node->decrement_counter();
+            }
+
+        public:
+           
+            typedef continue_msg input_type;
+            typedef continue_msg output_type;
+            decrementer( int number_of_predecessors = 0 ) : continue_receiver( number_of_predecessors ) { }
+            void set_owner( T *node ) { my_node = node; }
+        };
+
+    }
+    //! @endcond INTERNAL
+
+
+    //! The graph class
+    /** This class serves as a handle to the graph */
+    class graph : internal::no_copy {
+
+        template< typename Body >
+        class run_task : public task {
+        public: 
+            run_task( Body& body ) : my_body(body) {}
+            task *execute() {
+                my_body();
+                return NULL;
+            }
+        private:
+            Body my_body;
+        };
+
+        template< typename Receiver, typename Body >
+        class run_and_put_task : public task {
+        public: 
+            run_and_put_task( Receiver &r, Body& body ) : my_receiver(r), my_body(body) {}
+            task *execute() {
+                my_receiver.try_put( my_body() );
+                return NULL;
+            }
+        private:
+            Receiver &my_receiver;
+            Body my_body;
+        };
+
+    public:
+
+        //! An enumeration the provides the two most common concurrency levels: unlimited and serial
+        enum concurrency { unlimited = 0, serial = 1 };
+
+        //! Constructs a graph withy no nodes.
+        graph() : my_root_task( new ( task::allocate_root( ) ) empty_task ) {
+            my_root_task->set_ref_count(1);
+        }
+
+        //! Destroys the graph.
+        /** Calls wait_for_all on the graph, deletes all of the nodes appended by calls to add, and then 
+            destroys the root task of the graph. */ 
+        ~graph() {
+            wait_for_all();
+            my_root_task->set_ref_count(0);
+            task::destroy( *my_root_task );
+        }
+
+
+        //! Used to register that an external entity may still interact with the graph.
+        /** The graph will not return from wait_for_all until a matching number of decrement_wait_count calls
+            is made. */
+        void increment_wait_count() { 
+            if (my_root_task)
+                my_root_task->increment_ref_count();
+        }
+
+        //! Deregisters an external entity that may have interacted with the graph.
+        /** The graph will not return from wait_for_all until all the number of decrement_wait_count calls
+            matches the number of increment_wait_count calls. */
+        void decrement_wait_count() { 
+            if (my_root_task)
+                my_root_task->decrement_ref_count(); 
+        }
+
+        //! Spawns a task that runs a body and puts its output to a specific receiver
+        /** The task is spawned as a child of the graph. This is useful for running tasks 
+            that need to block a wait_for_all() on the graph.  For example a one-off source. */
+        template< typename Receiver, typename Body >
+            void run( Receiver &r, Body body ) {
+           task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+               run_and_put_task< Receiver, Body >( r, body ) );
+        }
+
+        //! Spawns a task that runs a function object 
+        /** The task is spawned as a child of the graph. This is useful for running tasks 
+            that need to block a wait_for_all() on the graph. For example a one-off source. */
+        template< typename Body >
+        void run( Body body ) {
+           task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+               run_task< Body >( body ) );
+        }
+
+        //! Waits until the graph is idle and the number of decrement_wait_count calls equals the number of increment_wait_count calls.
+        /** The waiting thread will go off and steal work while it is block in the wait_for_all. */
+        void wait_for_all() {
+            if (my_root_task)
+                my_root_task->wait_for_all();
+            my_root_task->set_ref_count(1);
+        }
+
+        //! Returns the root task of the graph
+        task * root_task() {
+            return my_root_task;
+        }
+
+    private:
+
+        task *my_root_task;
+
+    };
+
+
+    //! @cond INTERNAL
+    namespace internal {
+
+        //! Implements methods for a function node that takes a type T as input
+        template< typename Input, typename Output >
+        class function_input : public receiver<Input>, no_assign {
+            typedef sender<Input> predecessor_type;
+            enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+            enum op_type {reg_pred, rem_pred, app_body, tryput, try_fwd};
+            typedef function_input<Input, Output> my_class;
+
+        public:
+            //! The input type of this receiver
+            typedef Input input_type;
+            //! The output type of this receiver
+            typedef Output output_type;
+
+            //! Constructor for function_input
+            template< typename Body >
+            function_input( graph &g, size_t max_concurrency, Body& body )
+                : my_root_task(g.root_task()), my_max_concurrency(max_concurrency), my_concurrency(internal::node_state_idle),
+                  my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ),
+                forwarder_busy(false) {
+                my_predecessors.set_owner(this);
+                my_aggregator.initialize_handler(my_handler(this));
+            }
+
+            //! Destructor
+            virtual ~function_input() { delete my_body; }
+
+            //! Put to the node
+            virtual bool try_put( input_type t ) {
+               if ( my_max_concurrency == 0 ) {
+                   spawn_body_task( t );
+                   return true;
+               } else {
+                   my_operation op_data(t, tryput);
+                   my_aggregator.execute(&op_data);
+                   return op_data.status == SUCCEEDED;
+               }
+            }
+
+            //! Adds src to the list of cached predecessors.
+            /* override */ bool register_predecessor( predecessor_type &src ) {
+                my_operation op_data(reg_pred);
+                op_data.r = &src;
+                my_aggregator.execute(&op_data);
+                return true;
+            }
+
+            //! Removes src from the list of cached predecessors.
+            /* override */ bool remove_predecessor( predecessor_type &src ) {
+                my_operation op_data(rem_pred);
+                op_data.r = &src;
+                my_aggregator.execute(&op_data);
+                return true;
+            }
+
+        protected:
+            task *my_root_task;
+            const size_t my_max_concurrency;
+            size_t my_concurrency;
+            function_body<input_type, output_type> *my_body;
+            predecessor_cache<input_type, null_mutex > my_predecessors;
+
+            virtual broadcast_cache<output_type > &successors() = 0;
+
+        private:
+            friend class apply_body_task< function_input< input_type, output_type >, input_type >;
+            friend class forward_task< function_input< input_type, output_type > >;
+
+            class my_operation : public aggregated_operation< my_operation > {
+            public:
+                char type;
+                union {
+                    input_type *elem;
+                    predecessor_type *r;
+                };
+                my_operation(const input_type& e, op_type t) :
+                    type(char(t)), elem(const_cast<input_type*>(&e)) {}
+                my_operation(op_type t) : type(char(t)), r(NULL) {}
+            };
+
+            bool forwarder_busy;
+            typedef internal::aggregating_functor<my_class, my_operation> my_handler;
+            friend class internal::aggregating_functor<my_class, my_operation>;
+            aggregator< my_handler, my_operation > my_aggregator;
+
+            void handle_operations(my_operation *op_list) {
+                my_operation *tmp;
+                while (op_list) {
+                    tmp = op_list;
+                    op_list = op_list->next;
+                    switch (tmp->type) {
+                    case reg_pred:
+                        my_predecessors.add(*(tmp->r));
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                        if (!forwarder_busy) {
+                            forwarder_busy = true;
+                            spawn_forward_task();
+                        }
+                        break;
+                    case rem_pred:
+                        my_predecessors.remove(*(tmp->r));
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                        break;
+                    case app_body:
+                        __TBB_ASSERT(my_max_concurrency != 0, NULL);
+                        --my_concurrency;
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                        if (my_concurrency<my_max_concurrency) {
+                            input_type i;
+                            if (my_predecessors.get_item(i)) {
+                                ++my_concurrency;
+                                spawn_body_task(i);
+                            }
+                        }
+                        break;
+                    case tryput: internal_try_put(tmp);  break;
+                    case try_fwd: internal_forward(tmp);  break;
+                    }
+                }
+            }
+
+            //! Put to the node
+            void internal_try_put(my_operation *op) {
+                __TBB_ASSERT(my_max_concurrency != 0, NULL);
+                if (my_concurrency < my_max_concurrency) {
+                   ++my_concurrency;
+                   spawn_body_task(*(op->elem));
+                   __TBB_store_with_release(op->status, SUCCEEDED);
+               } else {
+                   __TBB_store_with_release(op->status, FAILED);
+               }
+            }
+
+            //! Tries to spawn bodies if available and if concurrency allows
+            void internal_forward(my_operation *op) {
+                if (my_concurrency<my_max_concurrency || !my_max_concurrency) {
+                    input_type i;
+                    if (my_predecessors.get_item(i)) {
+                        ++my_concurrency;
+                        __TBB_store_with_release(op->status, SUCCEEDED);
+                        spawn_body_task(i);
+                        return;
+                    }
+                }
+                __TBB_store_with_release(op->status, FAILED);
+                forwarder_busy = false;
+            }
+
+            //! Applies the body to the provided input
+            void apply_body( input_type &i ) {
+                successors().try_put( (*my_body)(i) );
+                if ( my_max_concurrency != 0 ) {
+                    my_operation op_data(app_body);
+                    my_aggregator.execute(&op_data);
+                }
+            }
+
+           //! Spawns a task that calls apply_body( input )
+           inline void spawn_body_task( input_type &input ) {
+               task::enqueue(*new(task::allocate_additional_child_of(*my_root_task)) apply_body_task<function_input<input_type, output_type>, input_type >(*this, input));
+           }
+
+           //! This is executed by an enqueued task, the "forwarder"
+           void forward() {
+               my_operation op_data(try_fwd);
+               do {
+                   op_data.status = WAIT;
+                   my_aggregator.execute(&op_data);
+               } while (op_data.status == SUCCEEDED);
+           }
+
+           //! Spawns a task that calls forward()
+           inline void spawn_forward_task() {
+               task::enqueue(*new(task::allocate_additional_child_of(*my_root_task)) forward_task<function_input<input_type, output_type> >(*this));
+           }
+        };
+
+        //! Implements methods for an executable node that takes continue_msg as input
+        template< typename Output >
+        class continue_input : public continue_receiver {
+        public:
+
+            //! The input type of this receiver
+            typedef continue_msg input_type;
+    
+            //! The output type of this receiver
+            typedef Output output_type;
+
+            template< typename Body >
+            continue_input( graph &g, Body& body )
+                : my_root_task(g.root_task()), 
+                 my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
+
+            template< typename Body >
+            continue_input( graph &g, int number_of_predecessors, Body& body )
+                : continue_receiver( number_of_predecessors ), my_root_task(g.root_task()), 
+                 my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
+
+        protected:
+
+            task *my_root_task;
+            function_body<input_type, output_type> *my_body;
+
+            virtual broadcast_cache<output_type > &successors() = 0; 
+
+            friend class apply_body_task< continue_input< Output >, continue_msg >;
+
+            //! Applies the body to the provided input
+            /* override */ void apply_body( input_type ) {
+                successors().try_put( (*my_body)( continue_msg() ) );
+            }
+
+            //! Spawns a task that applies the body
+            /* override */ void execute( ) {
+                task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                   apply_body_task< continue_input< Output >, continue_msg >( *this, continue_msg() ) ); 
+            }
+        };
+
+        //! Implements methods for both executable and function nodes that puts Output to its successors
+        template< typename Output >
+        class function_output : public sender<Output> {
+        public:
+
+            typedef Output output_type;
+
+            function_output() { }
+
+            //! Adds a new successor to this node
+            /* override */ bool register_successor( receiver<output_type> &r ) {
+                successors().register_successor( r );
+                return true;
+            }
+
+            //! Removes a successor from this node
+            /* override */ bool remove_successor( receiver<output_type> &r ) {
+                successors().remove_successor( r );
+                return true;
+            }
+  
+        protected:
+
+            virtual broadcast_cache<output_type > &successors() = 0; 
+
+        };
+
+    }
+    //! @endcond INTERNAL
+
+    //! An executable node that acts as a source, i.e. it has no predecessors
+    template < typename Output >
+    class source_node : public graph_node, public sender< Output > {
+    public:
+
+        //! The type of the output message, which is complete
+        typedef Output output_type;           
+
+        //! The type of successors of this node
+        typedef receiver< Output > successor_type;
+
+        //! Constructor for a node with a successor
+        template< typename Body >
+        source_node( graph &g, Body body, bool is_active = true )
+             : my_root_task(g.root_task()), my_state( is_active ? internal::node_state_idle : internal::node_state_inactive ),
+              my_body( new internal::source_body_leaf< output_type, Body>(body) ),
+              my_reserved(false), my_has_cached_item(false) { 
+            my_successors.set_owner(this);
+        }
+
+        //! The destructor
+        ~source_node() { delete my_body; }
+
+        //! Add a new successor to this node
+        /* override */ bool register_successor( receiver<output_type> &r ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            my_successors.register_successor(r);
+            if ( my_state != internal::node_state_inactive )
+                spawn_put();
+            return true;
+        }
+
+        //! Removes a successor from this node
+        /* override */ bool remove_successor( receiver<output_type> &r ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            my_successors.remove_successor(r);
+            return true;
+        }
+
+        //! Request an item from the node
+        /*override */ bool try_get( output_type &v ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            if ( my_reserved )  
+                return false;
+
+            if ( my_has_cached_item ) {
+                v = my_cached_item;
+                my_has_cached_item = false;
+            } else if ( (*my_body)(v) == false ) {
+                return false;
+            }
+            return true;
+        }
+
+        //! Reserves an item.
+        /* override */ bool try_reserve( output_type &v ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            if ( my_reserved ) {
+                return false;
+            }
+
+            if ( !my_has_cached_item && (*my_body)(my_cached_item) )  
+                my_has_cached_item = true;
+
+            if ( my_has_cached_item ) {
+                v = my_cached_item;
+                my_reserved = true;
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        //! Release a reserved item.  
+        /**  true = item has been released and so remains in sender, dest must request or reserve future items */
+        /* override */ bool try_release( ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" );
+            my_reserved = false;
+            spawn_put();
+            return true;
+        }
+
+        //! Consumes a reserved item
+        /* override */ bool try_consume( ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" );
+            my_reserved = false;
+            my_has_cached_item = false;
+            if ( !my_successors.empty() ) {
+                spawn_put();
+            }
+            return true;
+        }
+
+        //! Activates a node that was created in the inactive state
+        void activate() {
+            spin_mutex::scoped_lock lock(my_mutex);
+            my_state = internal::node_state_idle;
+            if ( !my_successors.empty() )
+                spawn_put();
+        }
+
+    private:
+
+        task *my_root_task;
+        spin_mutex my_mutex;
+        internal::node_state my_state;
+        internal::source_body<output_type> *my_body;
+        internal::broadcast_cache< output_type > my_successors;
+        bool my_reserved;
+        bool my_has_cached_item;
+        output_type my_cached_item;
+
+        friend class internal::source_task< source_node< output_type > >;
+
+        //! Applies the body
+        /* override */ void apply_body( ) {
+            output_type v;
+            if ( try_reserve(v) == false )
+                return;
+
+            if ( my_successors.try_put( v ) ) 
+                try_consume();
+            else
+                try_release();
+        }
+
+        //! Spawns a task that applies the body
+        /* override */ void spawn_put( ) {
+            task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+               internal::source_task< source_node< output_type > >( *this ) ); 
+        }
+
+    };
+
+    //! Implements a function node that supports Input -> Output
+    template <typename Input, typename Output = continue_msg >
+    class function_node : public graph_node, public internal::function_input<Input,Output>, public internal::function_output<Output> {
+    public:
+
+        typedef Input input_type;
+        typedef Output output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        //! Constructor
+        template< typename Body >
+        function_node( graph &g, size_t concurrency, Body body )
+        : internal::function_input<input_type,output_type>( g, concurrency, body ) {
+            my_successors.set_owner(this);
+        }
+
+    protected:
+
+        internal::broadcast_cache<output_type> my_successors; 
+        /* override */ internal::broadcast_cache<output_type> &successors () { return my_successors; }
+
+    };
+
+    //! Implements an executable node that supports continue_msg -> Output
+    template <typename Output>
+    class executable_node : public graph_node, public internal::continue_input<Output>, public internal::function_output<Output> {
+    public:
+
+        typedef continue_msg input_type;
+        typedef Output output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+         //! Constructor for executable node with continue_msg -> Output
+         template <typename Body >
+         executable_node( graph &g, Body body )
+                 : internal::continue_input<output_type>( g, body ) {
+             my_successors.set_owner(this);
+         }
+
+         //! Constructor for executable node with continue_msg -> Output
+         template <typename Body >
+         executable_node( graph &g, int number_of_predecessors, Body body )
+                 : internal::continue_input<output_type>( g, number_of_predecessors, body ) {
+             my_successors.set_owner(this);
+         }
+
+    protected:
+
+        internal::broadcast_cache<output_type> my_successors; 
+        /* override */ internal::broadcast_cache<output_type> &successors () { return my_successors; }
+
+    };
+
+
+
+    template< typename T >
+    class overwrite_node : public graph_node, public receiver<T>, public sender<T>, internal::no_copy {
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        overwrite_node() : my_buffer_is_valid(false) {
+            my_successors.set_owner( this );
+        }
+
+        ~overwrite_node() {}
+
+        /* override */ bool register_successor( successor_type &s ) {
+            spin_mutex::scoped_lock l( my_mutex );
+            if ( my_buffer_is_valid ) {
+                // We have a valid value that must be forwarded immediately.
+                if ( s.try_put( my_buffer ) || !s.register_predecessor( *this  ) ) {
+                    // We add the successor: it accepted our put or it rejected it but won't let use become a predecessor
+                    my_successors.register_successor( s );
+                    return true;
+                } else {
+                    // We don't add the successor: it rejected our put and we became its predecessor instead
+                    return false;
+                }
+            } else {
+                // No valid value yet, just add as successor
+                my_successors.register_successor( s );
+                return true;
+            }
+        }
+
+        /* override */ bool remove_successor( successor_type &s ) {
+            spin_mutex::scoped_lock l( my_mutex );
+            my_successors.remove_successor(s);
+            return true;
+        }
+
+        /* override */ bool try_put( T v ) {
+            spin_mutex::scoped_lock l( my_mutex );
+            my_buffer = v;
+            my_buffer_is_valid = true;
+            my_successors.try_put(v);
+            return true;
+        }
+
+        /* override */ bool try_get( T &v ) {
+            spin_mutex::scoped_lock l( my_mutex );
+            if ( my_buffer_is_valid ) {
+                v = my_buffer;
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        bool is_valid() {
+           spin_mutex::scoped_lock l( my_mutex );
+           return my_buffer_is_valid;
+        }
+
+        void clear() {
+           spin_mutex::scoped_lock l( my_mutex );
+           my_buffer_is_valid = false;
+        }
+
+    protected:
+
+        spin_mutex my_mutex;
+        internal::broadcast_cache< T, null_rw_mutex > my_successors;
+        T my_buffer;
+        bool my_buffer_is_valid;
+
+    };
+
+    template< typename T >
+    class write_once_node : public overwrite_node<T> {
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        /* override */ bool try_put( T v ) {
+            spin_mutex::scoped_lock l( this->my_mutex );
+            if ( this->my_buffer_is_valid ) {
+                return false;
+            } else {
+                this->my_buffer = v;
+                this->my_buffer_is_valid = true;
+                this->my_successors.try_put(v);
+                return true;
+            }
+        }
+    };
+
+    //! Broadcasts completion message when it receives completion messages from all predecessors. Then resets.
+    /** Is equivalent to an executable_node< continue_msg > with an empty_body */
+    class continue_node : public executable_node< continue_msg > { 
+    public:
+
+        typedef continue_msg input_type;
+        typedef continue_msg output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        continue_node( graph &g ) : executable_node<continue_msg>( g, internal::empty_body< continue_msg, continue_msg>() ) {}
+    };
+
+    //! Forwards messages of type T to all successors
+    template <typename T>
+    class broadcast_node : public graph_node, public receiver<T>, public sender<T>, internal::no_copy {
+
+        internal::broadcast_cache<T> my_successors;
+
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        broadcast_node( ) {
+           my_successors.set_owner( this ); 
+        }
+
+        //! Adds a successor
+        virtual bool register_successor( receiver<T> &r ) {
+            my_successors.register_successor( r );
+            return true;
+        }
+
+        //! Removes s as a successor
+        virtual bool remove_successor( receiver<T> &r ) {
+            my_successors.remove_successor( r );
+            return true;
+        }
+
+        /* override */ bool try_put( T t ) {
+            my_successors.try_put(t);
+            return true;
+        }
+
+    };
+
+#include "_item_buffer.h"
+
+    //! Forwards messages in arbitrary order
+    template <typename T, typename A=cache_aligned_allocator<T> >
+        class buffer_node : public graph_node, public reservable_item_buffer<T, A>, public receiver<T>, public sender<T>, internal::no_copy {
+    public:
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+        typedef buffer_node<T, A> my_class;
+    protected:
+        typedef size_t size_type;
+        internal::round_robin_cache< T, null_rw_mutex > my_successors;
+
+        task *my_parent;
+
+        friend class internal::forward_task< buffer_node< T, A > >;
+
+        enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd};
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+
+        // implements the aggregator_operation concept
+        class buffer_operation : public internal::aggregated_operation< buffer_operation > {
+        public:
+            char type;
+            T *elem;
+            successor_type *r;
+            buffer_operation(const T& e, op_type t) :
+                type(char(t)), elem(const_cast<T*>(&e)), r(NULL) {}
+            buffer_operation(op_type t) : type(char(t)), r(NULL) {}
+        };
+
+        bool forwarder_busy;
+        typedef internal::aggregating_functor<my_class, buffer_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, buffer_operation>;
+        internal::aggregator< my_handler, buffer_operation> my_aggregator;
+
+        virtual void handle_operations(buffer_operation *op_list) {
+            buffer_operation *tmp;
+            bool try_forwarding=false;
+            while (op_list) {
+                tmp = op_list;
+                op_list = op_list->next;
+                switch (tmp->type) {
+                case reg_succ: internal_reg_succ(tmp);  try_forwarding = true; break;
+                case rem_succ: internal_rem_succ(tmp); break;
+                case req_item: internal_pop(tmp); break;
+                case res_item: internal_reserve(tmp); break;
+                case rel_res:  internal_release(tmp);  try_forwarding = true; break;
+                case con_res:  internal_consume(tmp);  try_forwarding = true; break;
+                case put_item: internal_push(tmp);  try_forwarding = true; break;
+                case try_fwd:  internal_forward(tmp); break;
+                }
+            }
+            if (try_forwarding && !forwarder_busy) {
+                forwarder_busy = true;
+                task::enqueue(*new(task::allocate_additional_child_of(*my_parent)) internal::forward_task< buffer_node<input_type, A> >(*this));
+            }
+        }
+
+        //! This is executed by an enqueued task, the "forwarder"
+        virtual void forward() {
+            buffer_operation op_data(try_fwd);
+            do {
+                op_data.status = WAIT;
+                my_aggregator.execute(&op_data);
+            } while (op_data.status == SUCCEEDED);
+        }
+
+        //! Register successor
+        virtual void internal_reg_succ(buffer_operation *op) {
+            my_successors.register_successor(*(op->r));
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+        //! Remove successor
+        virtual void internal_rem_succ(buffer_operation *op) {
+            my_successors.remove_successor(*(op->r));
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+        //! Tries to forward valid items to successors
+        virtual void internal_forward(buffer_operation *op) {
+            T i_copy;
+            bool success = false; // flagged when a successor accepts
+            size_type counter = my_successors.size();
+            // Try forwarding, giving each successor a chance
+            while (counter>0 && !this->buffer_empty() && this->item_valid(this->my_tail-1)) {
+                this->fetch_back(i_copy);
+                if( my_successors.try_put(i_copy) ) {
+                    this->invalidate_back();
+                    --(this->my_tail);
+                    success = true; // found an accepting successor
+                }
+                --counter;
+            }
+            if (success && !counter)
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            else {
+                __TBB_store_with_release(op->status, FAILED);
+                forwarder_busy = false;
+            }
+        }
+
+        virtual void internal_push(buffer_operation *op) {
+            this->push_back(*(op->elem));
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+        virtual void internal_pop(buffer_operation *op) {
+            if(this->pop_back(*(op->elem))) {
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            }
+            else {
+                __TBB_store_with_release(op->status, FAILED);
+            }
+        }
+
+        virtual void internal_reserve(buffer_operation *op) {
+            if(this->reserve_front(*(op->elem))) {
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            }
+            else {
+                __TBB_store_with_release(op->status, FAILED);
+            }
+        }
+
+        virtual void internal_consume(buffer_operation *op) {
+            this->consume_front();
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+        virtual void internal_release(buffer_operation *op) {
+            this->release_front();
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+    public:
+        //! Constructor
+        buffer_node( graph &g ) : reservable_item_buffer<T>(),
+            my_parent( g.root_task() ), forwarder_busy(false) {
+            my_successors.set_owner(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        virtual ~buffer_node() {}
+
+        //
+        // message sender implementation
+        //
+
+        //! Adds a new successor.
+        /** Adds successor r to the list of successors; may forward tasks.  */
+        /* override */ bool register_successor( receiver<output_type> &r ) {
+            buffer_operation op_data(reg_succ);
+            op_data.r = &r;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+        //! Removes a successor.
+        /** Removes successor r from the list of successors.
+            It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */
+        /* override */ bool remove_successor( receiver<output_type> &r ) {
+            r.remove_predecessor(*this);
+            buffer_operation op_data(rem_succ);
+            op_data.r = &r;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+        //! Request an item from the buffer_node
+        /**  true = v contains the returned item<BR>
+             false = no item has been returned */
+        /* override */ bool try_get( T &v ) {
+            buffer_operation op_data(req_item);
+            op_data.elem = &v;
+            my_aggregator.execute(&op_data);
+            return (op_data.status==SUCCEEDED);
+        }
+
+        //! Reserves an item.
+        /**  false = no item can be reserved<BR>
+             true = an item is reserved */
+        /* override */ bool try_reserve( T &v ) {
+            buffer_operation op_data(res_item);
+            op_data.elem = &v;
+            my_aggregator.execute(&op_data);
+            return (op_data.status==SUCCEEDED);
+        }
+
+        //! Release a reserved item.
+        /**  true = item has been released and so remains in sender */
+        /* override */ bool try_release() {
+            buffer_operation op_data(rel_res);
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+        //! Consumes a reserved item.
+        /** true = item is removed from sender and reservation removed */
+        /* override */ bool try_consume() {
+            buffer_operation op_data(con_res);
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+        //! Receive an item
+        /** true is always returned */
+        /* override */ bool try_put(T t) {
+            buffer_operation op_data(t, put_item);
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+    };
+
+
+    //! Forwards messages in FIFO order
+    template <typename T, typename A=cache_aligned_allocator<T> >
+    class queue_node : public buffer_node<T, A> {
+    protected:
+    typedef typename buffer_node<T, A>::size_type size_type;
+    typedef typename buffer_node<T, A>::buffer_operation queue_operation;
+
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+
+        //! Tries to forward valid items to successors
+        /* override */ void internal_forward(queue_operation *op) {
+            T i_copy;
+            bool success = false; // flagged when a successor accepts
+            size_type counter = this->my_successors.size();
+            if (this->my_reserved || !this->item_valid(this->my_head)){
+                __TBB_store_with_release(op->status, FAILED);
+                this->forwarder_busy = false;
+                return;
+            }
+            // Keep trying to send items while there is at least one accepting successor
+            while (counter>0 && this->item_valid(this->my_head)) {
+                this->fetch_front(i_copy);
+                if(this->my_successors.try_put(i_copy)) {
+                     this->invalidate_front();
+                     ++(this->my_head);
+                    success = true; // found an accepting successor
+                }
+                --counter;
+            }
+            if (success && !counter)
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            else {
+                __TBB_store_with_release(op->status, FAILED);
+                this->forwarder_busy = false;
+            }
+        }
+
+        /* override */ void internal_pop(queue_operation *op) {
+            if ( this->my_reserved || !this->item_valid(this->my_head)){
+                __TBB_store_with_release(op->status, FAILED);
+            }
+            else {
+                this->pop_front(*(op->elem));
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            }
+        }
+        /* override */ void internal_reserve(queue_operation *op) {
+            if (this->my_reserved || !this->item_valid(this->my_head)) {
+                __TBB_store_with_release(op->status, FAILED);
+            }
+            else {
+                this->my_reserved = true;
+                this->fetch_front(*(op->elem));
+                this->invalidate_front();
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            }
+        }
+        /* override */ void internal_consume(queue_operation *op) {
+            this->consume_front();
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        //! Constructor
+    queue_node( graph &g ) : buffer_node<T, A>(g) {}
+    };
+
+    //! Forwards messages in sequence order
+    template< typename T, typename A=cache_aligned_allocator<T> >
+    class sequencer_node : public queue_node<T, A> {
+        internal::function_body< T, size_t > *my_sequencer;
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        //! Constructor
+        template< typename Sequencer >
+        sequencer_node( graph &g, const Sequencer& s ) : queue_node<T, A>(g),
+            my_sequencer(new internal::function_body_leaf< T, size_t, Sequencer>(s) ) {}
+
+        //! Destructor
+        ~sequencer_node() { delete my_sequencer; }
+    protected:
+        typedef typename buffer_node<T, A>::size_type size_type;
+        typedef typename buffer_node<T, A>::buffer_operation sequencer_operation;
+
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+
+    private:
+        /* override */ void internal_push(sequencer_operation *op) {
+            size_type tag = (*my_sequencer)(*(op->elem));
+
+            this->my_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail;
+
+            if(this->size() > this->capacity())
+                this->grow_my_array(this->size());  // tail already has 1 added to it
+            this->item(tag) = std::make_pair( *(op->elem), true );
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+    };
+
+    //! Forwards messages in priority order
+    template< typename T, typename Compare = std::less<T>, typename A=cache_aligned_allocator<T> >
+    class priority_queue_node : public buffer_node<T, A> {
+    public:
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+        //! Constructor
+    priority_queue_node( graph &g ) : buffer_node<T, A>(g), mark(0) {}
+
+    protected:
+        typedef typename buffer_node<T, A>::size_type size_type;
+        typedef typename buffer_node<T, A>::item_type item_type;
+        typedef typename buffer_node<T, A>::buffer_operation prio_operation;
+
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+
+        /* override */ void handle_operations(prio_operation *op_list) {
+            prio_operation *tmp /*, *pop_list*/ ;
+            bool try_forwarding=false;
+            while (op_list) {
+                tmp = op_list;
+                op_list = op_list->next;
+                switch (tmp->type) {
+                case buffer_node<T, A>::reg_succ: this->internal_reg_succ(tmp); try_forwarding = true; break;
+                case buffer_node<T, A>::rem_succ: this->internal_rem_succ(tmp); break;
+                case buffer_node<T, A>::put_item: internal_push(tmp); try_forwarding = true; break;
+                case buffer_node<T, A>::try_fwd: internal_forward(tmp); break;
+                case buffer_node<T, A>::rel_res: internal_release(tmp); try_forwarding = true; break;
+                case buffer_node<T, A>::con_res: internal_consume(tmp); try_forwarding = true; break;
+                case buffer_node<T, A>::req_item: internal_pop(tmp); break;
+                case buffer_node<T, A>::res_item: internal_reserve(tmp); break;
+                }
+            }
+            // process pops!  for now, no special pop processing
+            if (mark<this->my_tail) heapify();
+            if (try_forwarding && !this->forwarder_busy) {
+                this->forwarder_busy = true;
+                task::enqueue(*new(task::allocate_additional_child_of(*(this->my_parent))) internal::forward_task< buffer_node<input_type, A> >(*this));
+            }
+        }
+
+        //! Tries to forward valid items to successors
+        /* override */ void internal_forward(prio_operation *op) {
+            T i_copy;
+            bool success = false; // flagged when a successor accepts
+            size_type counter = this->my_successors.size();
+
+            if (this->my_reserved || this->my_tail == 0) {
+                __TBB_store_with_release(op->status, FAILED);
+                this->forwarder_busy = false;
+                return;
+            }
+            // Keep trying to send while there exists an accepting successor
+            while (counter>0 && this->my_tail > 0) {
+                i_copy = this->my_array[0].first;
+                bool msg = this->my_successors.try_put(i_copy);
+                if ( msg == true ) {
+                     if (mark == this->my_tail) --mark;
+                    --(this->my_tail);
+                    this->my_array[0].first=this->my_array[this->my_tail].first;
+                    if (this->my_tail > 1) // don't reheap for heap of size 1
+                        reheap();
+                    success = true; // found an accepting successor
+                }
+                --counter;
+            }
+            if (success && !counter)
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            else {
+                __TBB_store_with_release(op->status, FAILED);
+                this->forwarder_busy = false;
+            }
+        }
+
+        /* override */ void internal_push(prio_operation *op) {
+            if ( this->my_tail >= this->my_array_size )
+                this->grow_my_array( this->my_tail + 1 );
+            this->my_array[this->my_tail] = std::make_pair( *(op->elem), true );
+            ++(this->my_tail);
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+        /* override */ void internal_pop(prio_operation *op) {
+            if ( this->my_reserved == true || this->my_tail == 0 ) {
+                __TBB_store_with_release(op->status, FAILED);
+            }
+            else {
+                if (mark<this->my_tail &&
+                    compare(this->my_array[0].first,
+                            this->my_array[this->my_tail-1].first)) {
+                    // there are newly pushed elems; last one higher than top
+                    // copy the data
+                    *(op->elem) = this->my_array[this->my_tail-1].first;
+                    --(this->my_tail);
+                    __TBB_store_with_release(op->status, SUCCEEDED);
+                }
+                else { // extract and push the last element down heap
+                    *(op->elem) = this->my_array[0].first; // copy the data
+                    if (mark == this->my_tail) --mark;
+                    --(this->my_tail);
+                    __TBB_store_with_release(op->status, SUCCEEDED);
+                    this->my_array[0].first=this->my_array[this->my_tail].first;
+                    if (this->my_tail > 1) // don't reheap for heap of size 1
+                        reheap();
+                }
+            }
+        }
+        /* override */ void internal_reserve(prio_operation *op) {
+            if (this->my_reserved == true || this->my_tail == 0) {
+                __TBB_store_with_release(op->status, FAILED);
+            }
+            else {
+                this->my_reserved = true;
+                *(op->elem) = reserved_item = this->my_array[0].first;
+                if (mark == this->my_tail) --mark;
+                --(this->my_tail);
+                __TBB_store_with_release(op->status, SUCCEEDED);
+                this->my_array[0].first = this->my_array[this->my_tail].first;
+                if (this->my_tail > 1) // don't reheap for heap of size 1
+                    reheap();
+            }
+        }
+        /* override */ void internal_consume(prio_operation *op) {
+            this->my_reserved = false;
+            __TBB_store_with_release(op->status, SUCCEEDED);
+        }
+        /* override */ void internal_release(prio_operation *op) {
+            if (this->my_tail >= this->my_array_size)
+                this->grow_my_array( this->my_tail + 1 );
+            this->my_array[this->my_tail] = std::make_pair(reserved_item, true);
+            ++(this->my_tail);
+            this->my_reserved = false;
+            __TBB_store_with_release(op->status, SUCCEEDED);
+            heapify();
+        }
+    private:
+        Compare compare;
+        size_type mark;
+        input_type reserved_item;
+
+        void heapify() {
+            if (!mark) mark = 1;
+            for (; mark<this->my_tail; ++mark) { // for each unheaped element
+                size_type cur_pos = mark;
+                input_type to_place = this->my_array[mark].first;
+                do { // push to_place up the heap
+                    size_type parent = (cur_pos-1)>>1;
+                    if (!compare(this->my_array[parent].first, to_place))
+                        break;
+                    this->my_array[cur_pos].first = this->my_array[parent].first;
+                    cur_pos = parent;
+                } while( cur_pos );
+                this->my_array[cur_pos].first = to_place;
+            }
+        }
+
+        void reheap() {
+            size_type cur_pos=0, child=1;
+            while (child < mark) {
+                size_type target = child;
+                if (child+1<mark &&
+                    compare(this->my_array[child].first,
+                            this->my_array[child+1].first))
+                    ++target;
+                // target now has the higher priority child
+                if (compare(this->my_array[target].first,
+                            this->my_array[this->my_tail].first))
+                    break;
+                this->my_array[cur_pos].first = this->my_array[target].first;
+                cur_pos = target;
+                child = (cur_pos<<1)+1;
+            }
+            this->my_array[cur_pos].first = this->my_array[this->my_tail].first;
+        }
+    };
+
+    //! Forwards messages only if the threshold has not been reached
+    /** This node forwards items until its threshold is reached.
+        It contains no buffering.  If the downstream node rejects, the
+        message is dropped. */
+    template< typename T >
+    class limiter_node : public graph_node, public receiver< T >, public sender< T >, internal::no_copy {
+    public:
+
+        typedef T input_type;
+        typedef T output_type;
+        typedef sender< input_type > predecessor_type;
+        typedef receiver< output_type > successor_type;
+
+    private:
+
+        task *my_root_task;
+        size_t my_threshold;
+        size_t my_count;
+        internal::predecessor_cache< T > my_predecessors;
+        spin_mutex my_mutex;
+        internal::broadcast_cache< T > my_successors;
+
+        friend class internal::forward_task< limiter_node<T> >;
+
+        // Let decrementer call decrement_counter()
+        friend class internal::decrementer< limiter_node<T> >;
+
+        void decrement_counter() {
+            input_type v;
+            
+            // If we can't get / put an item immediately then drop the count
+            if ( my_predecessors.get_item( v ) == false 
+                 || my_successors.try_put(v) == false ) {
+                spin_mutex::scoped_lock lock(my_mutex);
+                --my_count;
+                if ( !my_predecessors.empty() ) 
+                    task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                                internal::forward_task< limiter_node<T> >( *this ) );
+            }
+        }
+
+        void forward() {
+            {
+                spin_mutex::scoped_lock lock(my_mutex);
+                if ( my_count < my_threshold ) 
+                    ++my_count;
+                else
+                    return;
+            }
+            decrement_counter();
+        }
+
+    public:
+
+        //! The internal receiver< continue_msg > that decrements the count
+        internal::decrementer< limiter_node<T> > decrement;
+
+        //! Constructor
+        limiter_node( graph &g, size_t threshold, int number_of_decrement_predecessors = 0 ) : 
+           my_root_task(g.root_task()), my_threshold(threshold), my_count(0), decrement(number_of_decrement_predecessors) {
+            my_predecessors.set_owner(this);
+            my_successors.set_owner(this);
+            decrement.set_owner(this);
+        }
+
+        //! Replace the current successor with this new successor
+        /* override */ bool register_successor( receiver<output_type> &r ) {
+            my_successors.register_successor(r);
+            return true;
+        }
+
+        //! Removes a successor from this node
+        /** r.remove_predecessor(*this) is also called. */
+        /* override */ bool remove_successor( receiver<output_type> &r ) {
+            r.remove_predecessor(*this);
+            my_successors.remove_successor(r);
+            return true;
+        }
+
+        //! Puts an item to this receiver
+        /* override */ bool try_put( T t ) {
+            {
+                spin_mutex::scoped_lock lock(my_mutex);
+                if ( my_count >= my_threshold ) 
+                    return false;
+                else
+                    ++my_count; 
+            }
+
+            bool msg = my_successors.try_put(t);
+
+            if ( msg != true ) {
+                spin_mutex::scoped_lock lock(my_mutex);
+                --my_count;
+                if ( !my_predecessors.empty() ) 
+                    task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                                internal::forward_task< limiter_node<T> >( *this ) );
+            }
+
+            return msg;
+        }
+
+        //! Removes src from the list of cached predecessors.
+        /* override */ bool register_predecessor( predecessor_type &src ) {
+            spin_mutex::scoped_lock lock(my_mutex);
+            my_predecessors.add( src );
+            if ( my_count < my_threshold && !my_successors.empty() ) 
+                task::enqueue( * new ( task::allocate_additional_child_of( *my_root_task ) ) 
+                               internal::forward_task< limiter_node<T> >( *this ) );
+            return true;
+        }
+
+        //! Removes src from the list of cached predecessors.
+        /* override */ bool remove_predecessor( predecessor_type &src ) {
+            my_predecessors.remove( src );
+            return true;
+        }
+
+    };
+
+    namespace internal {
+
+    struct forwarding_base {
+        forwarding_base(task *rt) : my_root_task(rt) {}
+        virtual ~forwarding_base() {}
+        virtual void decrement_port_count() = 0;
+        virtual void increment_port_count() = 0;
+        // moved here so input ports can queue tasks
+        task* my_root_task;
+    };
+
+    template< int N >
+    struct join_helper {
+
+        template< typename TupleType, typename PortType >
+        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
+            std::get<N-1>( my_input ).set_join_node_pointer(port);
+            join_helper<N-1>::set_join_node_pointer( my_input, port );
+        }
+        template< typename TupleType >
+        static inline void consume_reservations( TupleType &my_input ) {
+            std::get<N-1>( my_input ).consume();
+            join_helper<N-1>::consume_reservations( my_input );
+        }
+
+        template< typename TupleType >
+        static inline void release_my_reservation( TupleType &my_input ) {
+            std::get<N-1>( my_input ).release();
+        }
+
+        template <typename TupleType>
+        static inline void release_reservations( TupleType &my_input) {
+            join_helper<N-1>::release_reservations(my_input);
+            release_my_reservation(my_input);
+        }
+
+        template< typename InputTuple, typename OutputTuple >
+        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
+            if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false;
+            if ( !join_helper<N-1>::reserve( my_input, out ) ) {
+                release_my_reservation( my_input );
+                return false;
+            }
+            return true;
+        }
+
+        template<typename InputTuple, typename OutputTuple>
+        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
+            bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail
+            return join_helper<N-1>::get_my_item(my_input, out) && res;       // do get on other inputs before returning
+        }
+
+        template<typename InputTuple, typename OutputTuple>
+        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
+            return get_my_item(my_input, out);
+        }
+
+        template<typename InputTuple>
+        static inline void reset_my_port(InputTuple &my_input) {
+            join_helper<N-1>::reset_my_port(my_input);
+            std::get<N-1>(my_input).reset_port();
+        }
+
+        template<typename InputTuple>
+        static inline void reset_ports(InputTuple& my_input) {
+            reset_my_port(my_input);
+        }
+    };
+
+    template< >
+    struct join_helper<1> {
+
+        template< typename TupleType, typename PortType >
+        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
+            std::get<0>( my_input ).set_join_node_pointer(port);
+        }
+
+        template< typename TupleType >
+        static inline void consume_reservations( TupleType &my_input ) {
+            std::get<0>( my_input ).consume();
+        }
+
+        template< typename TupleType >
+        static inline void release_my_reservation( TupleType &my_input ) {
+            std::get<0>( my_input ).release();
+        }
+        
+        template<typename TupleType>
+        static inline void release_reservations( TupleType &my_input) {
+            release_my_reservation(my_input);
+        }
+
+        template< typename InputTuple, typename OutputTuple >
+        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
+            return std::get<0>( my_input ).reserve( std::get<0>( out ) );
+        }
+
+        template<typename InputTuple, typename OutputTuple>
+        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
+            return std::get<0>(my_input).get_item(std::get<0>(out));
+        }
+
+        template<typename InputTuple, typename OutputTuple>
+        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
+            return get_my_item(my_input, out);
+        }
+
+        template<typename InputTuple>
+        static inline void reset_my_port(InputTuple &my_input) {
+            std::get<0>(my_input).reset_port();
+        }
+
+        template<typename InputTuple>
+        static inline void reset_ports(InputTuple& my_input) {
+            reset_my_port(my_input);
+        }
+    };
+
+    namespace join_policy_namespace {
+        enum join_policy { reserving
+            , queueing
+        };
+    }
+    using namespace join_policy_namespace;
+
+    //! The two-phase join port
+    template< typename T >
+    class reserving_port : public receiver<T> {
+    public:
+        typedef T input_type;
+        typedef sender<T> predecessor_type;
+    private:
+        // ----------- Aggregator ------------
+        enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res };
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+        typedef reserving_port<T> my_class;
+
+        class reserving_port_operation : public aggregated_operation<reserving_port_operation> {
+        public:
+            char type;
+            union {
+                T *my_arg;
+                predecessor_type *my_pred;
+            };
+            reserving_port_operation(const T& e, op_type t) :
+                type(char(t)), my_arg(const_cast<T*>(&e)) {}
+            reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), 
+                my_pred(const_cast<predecessor_type *>(&s)) {}
+            reserving_port_operation(op_type t) : type(char(t)) {}
+        };
+
+        typedef internal::aggregating_functor<my_class, reserving_port_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, reserving_port_operation>;
+        aggregator<my_handler, reserving_port_operation> my_aggregator;
+
+        void handle_operations(reserving_port_operation* op_list) {
+            reserving_port_operation *current;
+            bool no_predecessors;
+            while(op_list) {
+                current = op_list;
+                op_list = op_list->next;
+                switch(current->type) {
+                case reg_pred:
+                    no_predecessors = my_predecessors.empty();
+                    my_predecessors.add(*(current->my_pred));
+                    if ( no_predecessors ) {
+                        my_join->decrement_port_count( ); // may try to forward
+                    }
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case rem_pred:
+                    my_predecessors.remove(*(current->my_pred));
+                    if(my_predecessors.empty()) my_join->increment_port_count();
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case res_item:
+                    if ( reserved ) {
+                        __TBB_store_with_release(current->status, FAILED);
+                    }
+                    else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) {
+                        reserved = true;
+                        __TBB_store_with_release(current->status, SUCCEEDED);
+                    } else {
+                        if ( my_predecessors.empty() ) {
+                            my_join->increment_port_count();
+                        }
+                        __TBB_store_with_release(current->status, FAILED);
+                    }
+                    break;
+                case rel_res:
+                    reserved = false;
+                    my_predecessors.try_release( );
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case con_res:
+                    reserved = false;
+                    my_predecessors.try_consume( );
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                }
+            }
+        }
+
+    public:
+
+        //! Constructor
+        reserving_port() : reserved(false) {
+            my_join = NULL;
+            my_predecessors.set_owner( this );
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        // copy constructor
+        reserving_port(const reserving_port& /* other */) : receiver<T>() {
+            reserved = false;
+            my_join = NULL;
+            my_predecessors.set_owner( this );
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        void set_join_node_pointer(forwarding_base *join) {
+            my_join = join;
+        }
+
+        // always rejects, so arc is reversed (and reserves can be done.)
+        bool try_put( T ) {
+            return false;
+        }
+
+        //! Add a predecessor
+        bool register_predecessor( sender<T> &src ) {
+            reserving_port_operation op_data(src, reg_pred);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        //! Remove a predecessor
+        bool remove_predecessor( sender<T> &src ) {
+            reserving_port_operation op_data(src, rem_pred);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        //! Reserve an item from the port
+        bool reserve( T &v ) {
+            reserving_port_operation op_data(v, res_item);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        //! Release the port
+        void release( ) {
+            reserving_port_operation op_data(rel_res);
+            my_aggregator.execute(&op_data);
+        }
+
+        //! Complete use of the port
+        void consume( ) {
+            reserving_port_operation op_data(con_res);
+            my_aggregator.execute(&op_data);
+        }
+
+    private:
+        forwarding_base *my_join;
+        reservable_predecessor_cache< T, null_mutex > my_predecessors;
+        bool reserved;
+    };
+
+    //! queueing join_port
+    template<typename T>
+    class queueing_port : public receiver<T>, public item_buffer<T> {
+    public:
+        typedef T input_type;
+        typedef sender<T> predecessor_type;
+        typedef queueing_port<T> my_node_type;
+
+// ----------- Aggregator ------------
+    private:
+        enum op_type { try__put, get__item, res_port };
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+        typedef queueing_port<T> my_class;
+
+        class queueing_port_operation : public aggregated_operation<queueing_port_operation> {
+        public:
+            char type;
+            union {
+                T my_val;
+                T *my_arg;
+            };
+            // constructor for value parameter
+            queueing_port_operation(const T& e, op_type t) :
+                // type(char(t)), my_val(const_cast<T>(e)) {}
+                type(char(t)), my_val(e) {}
+            // constructor for pointer parameter
+            queueing_port_operation(const T* p, op_type t) :
+                type(char(t)), my_arg(const_cast<T*>(p)) {}
+            // constructor with no parameter
+            queueing_port_operation(op_type t) : type(char(t)) {}
+        };
+
+        typedef internal::aggregating_functor<my_class, queueing_port_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, queueing_port_operation>;
+        aggregator<my_handler, queueing_port_operation> my_aggregator;
+
+        void handle_operations(queueing_port_operation* op_list) {
+            queueing_port_operation *current;
+            bool was_empty;
+            while(op_list) {
+                current = op_list;
+                op_list = op_list->next;
+                switch(current->type) {
+                case try__put:
+                    was_empty = this->buffer_empty();
+                    this->push_back(current->my_val);
+                    if (was_empty) my_join->decrement_port_count();
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case get__item:
+                    if(!this->buffer_empty()) {
+                        this->fetch_front(*(current->my_arg));
+                        __TBB_store_with_release(current->status, SUCCEEDED);
+                    }
+                    else {
+                        __TBB_store_with_release(current->status, FAILED);
+                    }
+                    break;
+                case res_port:
+                    __TBB_ASSERT(this->item_valid(this->my_head), "No item to reset");
+                    this->invalidate_front(); ++(this->my_head);
+                    if(this->item_valid(this->my_head)) {
+                        my_join->decrement_port_count();
+                    }
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                }
+            }
+        }
+// ------------ End Aggregator ---------------
+    public:
+
+        //! Constructor
+        queueing_port() : item_buffer<T>() {
+            my_join = NULL;
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        //! copy constructor
+        queueing_port(const queueing_port& /* other */) : receiver<T>(), item_buffer<T>() {
+            my_join = NULL;
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        //! record parent for tallying available items
+        void set_join_node_pointer(forwarding_base *join) {
+            my_join = join;
+        }
+
+        /*override*/bool try_put(T v) {
+            queueing_port_operation op_data(v, try__put);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+
+        bool get_item( T &v ) {
+            queueing_port_operation op_data(&v, get__item);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        // reset_port is called when item is accepted by successor, but
+        // is initiated by join_node.
+        void reset_port() {
+            queueing_port_operation op_data(res_port);
+            my_aggregator.execute(&op_data);
+            return;
+        }
+
+    private:
+        forwarding_base *my_join;
+    };
+
+    template<join_policy JP, typename InputTuple, typename OutputTuple>
+    class join_node_base;
+
+    //! join_node_FE : implements input port policy
+    template<join_policy JP, typename InputTuple, typename OutputTuple>
+    class join_node_FE;
+
+    template<typename InputTuple, typename OutputTuple>
+    class join_node_FE<reserving, InputTuple, OutputTuple> : public forwarding_base {
+    public:
+        static const int N = std::tuple_size<OutputTuple>::value;
+        typedef OutputTuple output_type;
+        typedef InputTuple input_type;
+        typedef join_node_base<reserving, InputTuple, OutputTuple> my_node_type; // for forwarding
+
+        join_node_FE(graph &g) : forwarding_base(g.root_task()), my_node(NULL) {
+            ports_with_no_inputs = N;
+            join_helper<N>::set_join_node_pointer(my_inputs, this);
+        }
+
+        void set_my_node(my_node_type *new_my_node) { my_node = new_my_node; }
+
+       void increment_port_count() {
+            ++ports_with_no_inputs;
+        }
+
+        // if all input_ports have predecessors, spawn forward to try and consume tuples
+        void decrement_port_count() {
+            if(ports_with_no_inputs.fetch_and_decrement() == 1) {
+                task::enqueue( * new ( task::allocate_additional_child_of( *(this->my_root_task) ) )
+                    forward_task<my_node_type>(*my_node) );
+            }
+        }
+
+        input_type &inputs() { return my_inputs; }
+    protected:
+        // all methods on input ports should be called under mutual exclusion from join_node_base.
+
+        bool tuple_build_may_succeed() {
+            return !ports_with_no_inputs;
+        }
+
+        bool try_to_make_tuple(output_type &out) {
+            if(ports_with_no_inputs) return false;
+            return join_helper<N>::reserve(my_inputs, out);
+        }
+
+        void tuple_accepted() {
+            join_helper<N>::consume_reservations(my_inputs);
+        }
+        void tuple_rejected() {
+            join_helper<N>::release_reservations(my_inputs);
+        }
+
+        input_type my_inputs;
+        my_node_type *my_node;
+        atomic<size_t> ports_with_no_inputs;
+    };
+
+    template<typename InputTuple, typename OutputTuple>
+    class join_node_FE<queueing, InputTuple, OutputTuple> : public forwarding_base {
+    public:
+        static const int N = std::tuple_size<OutputTuple>::value;
+        typedef OutputTuple output_type;
+        typedef InputTuple input_type;
+        typedef join_node_base<queueing, InputTuple, OutputTuple> my_node_type; // for forwarding
+
+        join_node_FE(graph &g) : forwarding_base(g.root_task()), my_node(NULL) {
+            ports_with_no_items = N;
+            join_helper<N>::set_join_node_pointer(my_inputs, this);
+        }
+
+        // needed for forwarding
+        void set_my_node(my_node_type *new_my_node) { my_node = new_my_node; }
+
+        void reset_port_count() {
+            ports_with_no_items = N;
+        }
+
+        // if all input_ports have items, spawn forward to try and consume tuples
+        void decrement_port_count() {
+            if(ports_with_no_items.fetch_and_decrement() == 1) {
+                task::enqueue( * new ( task::allocate_additional_child_of( *(this->my_root_task) ) )
+                    forward_task<my_node_type>(*my_node) );
+            }
+        }
+
+        void increment_port_count() { __TBB_ASSERT(false, NULL); }  // should never be called
+
+        input_type &inputs() { return my_inputs; }
+    protected:
+        // all methods on input ports should be called under mutual exclusion from join_node_base.
+
+        bool tuple_build_may_succeed() {
+            return !ports_with_no_items;
+        }
+
+        bool try_to_make_tuple(output_type &out) {
+            if(ports_with_no_items) return false;
+            return join_helper<N>::get_items(my_inputs, out);
+        }
+
+        void tuple_accepted() {
+            reset_port_count();
+            join_helper<N>::reset_ports(my_inputs);
+        }
+        void tuple_rejected() {
+            // nothing to do.
+        }
+
+        input_type my_inputs;
+        my_node_type *my_node;
+        atomic<size_t> ports_with_no_items;
+    };
+
+    //! join_node_base
+    template<join_policy JP, typename InputTuple, typename OutputTuple>
+    class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>,
+                           public sender<OutputTuple>, no_copy {
+    public:
+        typedef OutputTuple output_type;
+
+        typedef receiver<output_type> successor_type;
+        typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type;
+        using input_ports_type::tuple_build_may_succeed;
+        using input_ports_type::try_to_make_tuple;
+        using input_ports_type::tuple_accepted;
+        using input_ports_type::tuple_rejected;
+
+    private:
+        // ----------- Aggregator ------------
+        enum op_type { reg_succ, rem_succ, try__get, do_fwrd };
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+        typedef join_node_base<JP,InputTuple,OutputTuple> my_class;
+
+        class join_node_base_operation : public aggregated_operation<join_node_base_operation> {
+        public:
+            char type;
+            union {
+                output_type *my_arg;
+                successor_type *my_succ;
+            };
+            join_node_base_operation(const output_type& e, op_type t) :
+                type(char(t)), my_arg(const_cast<output_type*>(&e)) {}
+            join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), 
+                my_succ(const_cast<successor_type *>(&s)) {}
+            join_node_base_operation(op_type t) : type(char(t)) {}
+        };
+
+        typedef internal::aggregating_functor<my_class, join_node_base_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, join_node_base_operation>;
+        bool forwarder_busy;
+        aggregator<my_handler, join_node_base_operation> my_aggregator;
+
+        void handle_operations(join_node_base_operation* op_list) {
+            join_node_base_operation *current;
+            while(op_list) {
+                current = op_list;
+                op_list = op_list->next;
+                switch(current->type) {
+                case reg_succ:
+                    my_successors.register_successor(*(current->my_succ));
+                    if(tuple_build_may_succeed() && !forwarder_busy) {
+                        task::enqueue( * new ( task::allocate_additional_child_of(*(this->my_root_task)) )
+                                forward_task<join_node_base<JP,InputTuple,OutputTuple> >(*this));
+                        forwarder_busy = true;
+                    }
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case rem_succ:
+                    my_successors.remove_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case try__get:
+                    if(tuple_build_may_succeed()) {
+                        if(try_to_make_tuple(*(current->my_arg))) {
+                            tuple_accepted();
+                            __TBB_store_with_release(current->status, SUCCEEDED);
+                        }
+                        else __TBB_store_with_release(current->status, FAILED);
+                    }
+                    else __TBB_store_with_release(current->status, FAILED);
+                    break;
+                case do_fwrd: {
+                        bool build_succeeded;
+                        output_type out;
+                        if(tuple_build_may_succeed()) {
+                            do {
+                                build_succeeded = try_to_make_tuple(out);
+                                if(build_succeeded) {
+                                    if(my_successors.try_put(out)) {
+                                        tuple_accepted();
+                                    }
+                                    else {
+                                        tuple_rejected();
+                                        build_succeeded = false;
+                                    }
+                                }
+                            } while(build_succeeded);
+                        }
+                        __TBB_store_with_release(current->status, SUCCEEDED);
+                        forwarder_busy = false;
+                    }
+                    break;
+                }
+            }
+        }
+        // ---------- end aggregator -----------
+    public:
+
+        join_node_base(graph &g) : input_ports_type(g), forwarder_busy(false) {
+            my_successors.set_owner(this);
+            input_ports_type::set_my_node(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        bool register_successor(successor_type &r) {
+            join_node_base_operation op_data(r, reg_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        bool remove_successor( successor_type &r) {
+            join_node_base_operation op_data(r, rem_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        bool try_get( output_type &v) {
+            join_node_base_operation op_data(v, try__get);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+    private:
+        broadcast_cache<output_type, null_rw_mutex> my_successors;
+
+        friend class forward_task< join_node_base<JP, InputTuple, OutputTuple> >;
+
+        void forward() {
+            join_node_base_operation op_data(do_fwrd);
+            my_aggregator.execute(&op_data);
+        }
+    };
+
+    //! unfolded_join_node : passes input_ports_tuple_type to join_node_base.  We build the input port type
+    //  using tuple_element.
+    template<int N, typename OutputTuple, join_policy JP>
+    class unfolded_join_node;
+
+    template<typename OutputTuple>
+    class unfolded_join_node<2,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type> >,
+        OutputTuple
+                  >
+                  {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<3,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type> >,
+        OutputTuple
+                    >
+                    {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<4,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type> >,
+        OutputTuple
+                    > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<5,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<6,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type> >,
+        OutputTuple
+                    > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<7,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<8,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<9,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<8,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<8,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<10,OutputTuple,reserving> : public internal::join_node_base<reserving,
+        std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<8,OutputTuple>::type>,
+                reserving_port<typename std::tuple_element<9,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                reserving_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<7,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<8,OutputTuple>::type>, 
+                reserving_port<typename std::tuple_element<9,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<reserving, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<2,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type> >,
+        OutputTuple
+                  >
+                  {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<3,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type> >,
+        OutputTuple
+                    >
+                    {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<4,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type> >,
+        OutputTuple
+                    > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<5,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<6,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type> >,
+        OutputTuple
+                    > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<7,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<8,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<9,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<8,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<8,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    template<typename OutputTuple>
+    class unfolded_join_node<10,OutputTuple,queueing> : public internal::join_node_base<queueing,
+        std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<8,OutputTuple>::type>,
+                queueing_port<typename std::tuple_element<9,OutputTuple>::type> >,
+        OutputTuple
+                > {
+    public:
+        typedef typename std::tuple<
+                queueing_port<typename std::tuple_element<0,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<1,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<2,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<3,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<4,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<5,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<6,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<7,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<8,OutputTuple>::type>, 
+                queueing_port<typename std::tuple_element<9,OutputTuple>::type> > input_ports_tuple_type;
+        typedef OutputTuple output_type;
+    private:
+        typedef join_node_base<queueing, input_ports_tuple_type, output_type > base_type;
+    public:
+        unfolded_join_node(graph &g) : base_type(g) {}
+    };
+
+    //! templated function to refer to input ports of the join node
+    template<size_t N, typename JNT>
+    typename std::tuple_element<N, typename JNT::input_ports_tuple_type>::type &input_port(JNT &jn) {
+        return std::get<N>(jn.inputs());
+    }
+
+    } // namespace internal
+
+using namespace internal::join_policy_namespace;
+using internal::input_port;
+
+template<typename OutputTuple, join_policy JP=reserving>
+class join_node: public internal::unfolded_join_node<std::tuple_size<OutputTuple>::value, OutputTuple, JP> {
+private:
+    static const int N = std::tuple_size<OutputTuple>::value;
+    typedef typename internal::unfolded_join_node<N, OutputTuple, JP> unfolded_type;
+public:
+    typedef OutputTuple output_type;
+    typedef typename unfolded_type::input_ports_tuple_type input_ports_tuple_type;
+    join_node(graph &g) : unfolded_type(g) { }
+};
+
+    //
+    // Making edges
+    //
+  
+    //! Makes an edge between a single predecessor and a single successor
+    template< typename T >
+    inline void make_edge( sender<T> &p, receiver<T> &s ) {
+        p.register_successor( s );
+    }
+
+    //! Makes edges between a single predecessor and multiple successors
+    template< typename T, typename SIterator >
+    inline void make_edges( sender<T> &p, SIterator s_begin, SIterator s_end ) {
+        for ( SIterator i = s_begin; i != s_end; ++i ) {
+            make_edge( p, **i );
+        }
+    }
+
+    //! Makes edges between a set of predecessors and a single successor
+    template< typename T, typename PIterator >
+    inline void make_edges( PIterator p_begin, PIterator p_end, receiver<T> &s ) {
+        for ( PIterator i = p_begin; i != p_end; ++i ) {
+            make_edge( **i, s );
+        }
+    }
+
+}
+
+#endif
+
diff --git a/tbb/include/tbb/index.html b/tbb/include/tbb/index.html
new file mode 100644 (file)
index 0000000..33f69d1
--- /dev/null
@@ -0,0 +1,28 @@
+<HTML>
+<BODY>
+
+<H2>Overview</H2>
+Include files for Threading Building Blocks classes and functions.
+
+<BR><A HREF=".">Click here</A> to see all files in the directory.
+
+<H2>Directories</H2>
+<DL>
+<DT><A HREF="machine">machine</A>
+<DD>Include files for low-level architecture specific functionality.
+<DT><A HREF="compat">compat</A>
+<DD>Include files for source level compatibility with other frameworks.
+</DL>
+
+<HR>
+<A HREF="../index.html">Up to parent directory</A>
+<p></p>
+Copyright &copy; 2005-2011 Intel Corporation.  All Rights Reserved.
+<p></p>
+Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are 
+registered trademarks or trademarks of Intel Corporation or its 
+subsidiaries in the United States and other countries. 
+<p></p>
+* Other names and brands may be claimed as the property of others.
+</BODY>
+</HTML>
diff --git a/tbb/include/tbb/machine/gcc_generic.h b/tbb/include/tbb/machine/gcc_generic.h
new file mode 100644 (file)
index 0000000..8bf7922
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE      __SIZEOF_INT__
+
+//for some unknown reason straight mapping does not work. At least on mingw
+#if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+    #define __TBB_BIG_ENDIAN    0
+#elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+    #define __TBB_BIG_ENDIAN    1
+#else
+#error "This endiannes is not supported."
+#endif
+
+//As the port has absolutely no information about underlying hardware, the performance,
+//most likely, will be sub-optimal, due to usage of full memory fence where a lightweight
+//one would suffice..
+#define __TBB_acquire_consistency_helper()  __sync_synchronize()
+#define __TBB_release_consistency_helper()  __sync_synchronize()
+#define __TBB_full_memory_fence()           __sync_synchronize()
+#define __TBB_control_consistency_helper()  __sync_synchronize()
+
+
+#define __MACHINE_DECL_ATOMICS(S,T)                                                               \
+inline T __TBB_generic_gcc_cmpswp##S(volatile void *ptr, T value, T comparand ) {                 \
+    return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value);      \
+}                                                                                                 \
+
+__MACHINE_DECL_ATOMICS(1,int8_t)
+__MACHINE_DECL_ATOMICS(2,int16_t)
+__MACHINE_DECL_ATOMICS(4,int32_t)
+__MACHINE_DECL_ATOMICS(8,int64_t)
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_generic_gcc_cmpswp1(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_generic_gcc_cmpswp2(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_generic_gcc_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_generic_gcc_cmpswp8(P,V,C)
+
+#if (__TBB_WORDSIZE==4)
+    #define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap4(P,V,C)
+#elif  (__TBB_WORDSIZE==8)
+    #define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap8(P,V,C)
+#else
+    #error "Unsupported word size."
+#endif
diff --git a/tbb/include/tbb/machine/ibm_aix51.h b/tbb/include/tbb/machine/ibm_aix51.h
new file mode 100644 (file)
index 0000000..54bd080
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#define __TBB_WORDSIZE 8
+#define __TBB_BIG_ENDIAN 1
+
+#include <stdint.h>
+#include <unistd.h>
+#include <sched.h>
+
+extern "C" {
+
+int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
+int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
+void    __TBB_machine_flush  ();
+
+}
+
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cas_32(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cas_64(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cas_64(P,V,C)
+#define __TBB_Yield() sched_yield()
+
+#if __GNUC__
+#define __TBB_full_memory_fence() __asm__ __volatile__("sync": : :"memory")
+#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
+#else
+// IBM C++ Compiler does not support inline assembly
+#define __TBB_full_memory_fence() __TBB_machine_flush ()
+#define __TBB_release_consistency_helper() __TBB_machine_flush ()
+#endif
diff --git a/tbb/include/tbb/machine/linux_common.h b/tbb/include/tbb/machine/linux_common.h
new file mode 100644 (file)
index 0000000..15b581c
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <sched.h>
+#define __TBB_Yield()  sched_yield()
+
+/* Futex definitions */
+#include <sys/syscall.h>
+
+#if defined(SYS_futex)
+
+#define __TBB_USE_FUTEX 1
+#include <limits.h>
+#include <errno.h>
+// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
+
+#ifdef FUTEX_WAIT
+#define __TBB_FUTEX_WAIT FUTEX_WAIT
+#else
+#define __TBB_FUTEX_WAIT 0
+#endif
+
+#ifdef FUTEX_WAKE
+#define __TBB_FUTEX_WAKE FUTEX_WAKE
+#else
+#define __TBB_FUTEX_WAKE 1
+#endif
+
+#ifndef __TBB_ASSERT
+#error machine specific headers must be included after tbb_stddef.h
+#endif
+
+namespace tbb {
+
+namespace internal {
+
+inline int futex_wait( void *futex, int comparand ) {
+    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
+#if TBB_USE_ASSERT
+    int e = errno;
+    __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
+#endif /* TBB_USE_ASSERT */
+    return r;
+}
+
+inline int futex_wakeup_one( void *futex ) {
+    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
+    __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
+    return r;
+}
+
+inline int futex_wakeup_all( void *futex ) {
+    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
+    __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
+    return r;
+}
+
+} /* namespace internal */
+
+} /* namespace tbb */
+
+#endif /* SYS_futex */
diff --git a/tbb/include/tbb/machine/linux_ia32.h b/tbb/include/tbb/machine/linux_ia32.h
new file mode 100644 (file)
index 0000000..547bf50
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE 4
+#define __TBB_BIG_ENDIAN 0
+
+#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"memory")
+#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
+
+#if __TBB_ICC_ASM_VOLATILE_BROKEN
+#define __TBB_VOLATILE
+#else
+#define __TBB_VOLATILE volatile
+#endif
+
+#define __MACHINE_DECL_ATOMICS(S,T,X,R) \
+static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
+{                                                                                    \
+    T result;                                                                        \
+                                                                                     \
+    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
+                          : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr)              \
+                          : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
+                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)            \
+                          : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr)                \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
+                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)            \
+                          : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr)                 \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     
+__MACHINE_DECL_ATOMICS(1,int8_t,"","=q")
+__MACHINE_DECL_ATOMICS(2,int16_t,"","=r")
+__MACHINE_DECL_ATOMICS(4,int32_t,"l","=r")
+
+static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t result;
+    union {
+        int64_t i64;
+        int32_t i32[2];
+    };
+    i64 = value;
+#if __PIC__ 
+    /* compiling position-independent code */
+    // EBX register preserved for compliance with position-independent code rules on IA32
+    int32_t tmp;
+    __asm__ __volatile__ (
+            "movl  %%ebx,%2\n\t"
+            "movl  %5,%%ebx\n\t"
+#if __GNUC__==3
+            "lock\n\t cmpxchg8b %1\n\t"
+#else
+            "lock\n\t cmpxchg8b (%3)\n\t"
+#endif
+            "movl  %2,%%ebx"
+             : "=A"(result)
+             , "=m"(*(__TBB_VOLATILE int64_t *)ptr)
+             , "=m"(tmp)
+#if __GNUC__==3
+             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
+#else
+             : "SD"(ptr)
+#endif
+             , "0"(comparand)
+             , "m"(i32[0]), "c"(i32[1])
+             : "memory"
+#if __INTEL_COMPILER
+             ,"ebx"
+#endif
+    );
+#else /* !__PIC__ */
+    __asm__ __volatile__ (
+            "lock\n\t cmpxchg8b %1\n\t"
+             : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
+             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
+             , "0"(comparand)
+             , "b"(i32[0]), "c"(i32[1])
+             : "memory"
+    );
+#endif /* __PIC__ */
+    return result;
+}
+
+static inline int32_t __TBB_machine_lg( uint32_t x ) {
+    int32_t j;
+    __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
+    return j;
+}
+
+static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
+    __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
+    __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
+}
+
+static inline void __TBB_machine_pause( int32_t delay ) {
+    for (int32_t i = 0; i < delay; i++) {
+       __asm__ __volatile__("pause;");
+    }
+    return;
+}   
+
+static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
+    int64_t result;
+    if( ((uint32_t)ptr&7u)==0 ) {
+        // Aligned load
+        __asm__ __volatile__ ( "fildq %1\n\t"
+                               "fistpq %0" :  "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
+    } else {
+        // Unaligned load
+        result = __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
+    }
+    return result;
+}
+
+//! Handles misaligned 8-byte store
+/** Defined in tbb_misc.cpp */
+extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
+extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
+
+static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
+    if( ((uint32_t)ptr&7u)==0 ) {
+        // Aligned store
+        __asm__ __volatile__ ( "fildq %1\n\t"
+                               "fistpq %0" :  "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
+    } else {
+        // Unaligned store
+#if TBB_USE_PERFORMANCE_WARNINGS
+        __TBB_machine_store8_slow_perf_warning(ptr);
+#endif /* TBB_USE_PERFORMANCE_WARNINGS */
+        __TBB_machine_store8_slow(ptr,value);
+    }
+}
+// Machine specific atomic operations
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+
+#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
+#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
+#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
+#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
+
+#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
+#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
+#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
+#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
+
+#define __TBB_Store8(P,V) __TBB_machine_store8(P,V)
+#define __TBB_Load8(P)    __TBB_machine_load8(P)
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+
+// Those we chose not to implement (they will be implemented generically using CMPSWP8)
+#undef __TBB_FetchAndAdd8
+#undef __TBB_FetchAndStore8
+
+// Definition of other functions
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
+
+// Special atomic functions
+#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
+#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
+#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1)
+
+// Use generic definitions from tbb_machine.h
+#undef __TBB_TryLockByte
+#undef __TBB_LockByte
+
+// API to retrieve/update FPU control setting
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+struct __TBB_cpu_ctl_env_t {
+    int     mxcsr;
+    short   x87cw;
+};
+
+inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
+    __asm__ __volatile__ (
+            "stmxcsr %0\n\t"
+            "fstcw   %1"
+            : "=m"(ctl->mxcsr), "=m"(ctl->x87cw)
+    );
+}
+inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
+    __asm__ __volatile__ (
+            "ldmxcsr %0\n\t"
+            "fldcw   %1"
+            : : "m"(ctl->mxcsr), "m"(ctl->x87cw)
+    );
+}
+
diff --git a/tbb/include/tbb/machine/linux_ia64.h b/tbb/include/tbb/machine/linux_ia64.h
new file mode 100644 (file)
index 0000000..b815d3c
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+#include <ia64intrin.h>
+
+#define __TBB_WORDSIZE 8
+#define __TBB_BIG_ENDIAN 0
+#define __TBB_DECL_FENCED_ATOMICS 1
+
+// Most of the functions will be in a .s file
+
+extern "C" {
+    int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand); 
+    int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
+    int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
+    int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
+    int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
+    int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
+
+    int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
+    int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
+    int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
+    int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
+    int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
+    int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
+
+    int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
+    int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
+
+    int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
+    int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
+    int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
+
+    int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand); 
+    int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand); 
+    int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
+
+    int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand); 
+    int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand); 
+    int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
+
+    int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
+    int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand); 
+    int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand); 
+    int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
+
+    int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand); 
+    int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand); 
+    int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
+
+    int64_t __TBB_machine_lg(uint64_t value);
+    void __TBB_machine_pause(int32_t delay);
+    bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
+    int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
+
+    //! Retrieves the current RSE backing store pointer. IA64 specific.
+    void* __TBB_get_bsp();
+}
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1__TBB_full_fence(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2__TBB_full_fence(P,V,C) 
+
+#define __TBB_FetchAndAdd1(P,V)        __TBB_machine_fetchadd1__TBB_full_fence(P,V)
+#define __TBB_FetchAndAdd1acquire(P,V) __TBB_machine_fetchadd1acquire(P,V)
+#define __TBB_FetchAndAdd1release(P,V) __TBB_machine_fetchadd1release(P,V)
+#define __TBB_FetchAndAdd2(P,V)        __TBB_machine_fetchadd2__TBB_full_fence(P,V)
+#define __TBB_FetchAndAdd2acquire(P,V) __TBB_machine_fetchadd2acquire(P,V)
+#define __TBB_FetchAndAdd2release(P,V) __TBB_machine_fetchadd2release(P,V)
+#define __TBB_FetchAndAdd4acquire(P,V) __TBB_machine_fetchadd4acquire(P,V)
+#define __TBB_FetchAndAdd4release(P,V) __TBB_machine_fetchadd4release(P,V)
+#define __TBB_FetchAndAdd8acquire(P,V) __TBB_machine_fetchadd8acquire(P,V)
+#define __TBB_FetchAndAdd8release(P,V) __TBB_machine_fetchadd8release(P,V)
+
+#define __TBB_FetchAndStore1acquire(P,V) __TBB_machine_fetchstore1acquire(P,V)
+#define __TBB_FetchAndStore1release(P,V) __TBB_machine_fetchstore1release(P,V)
+#define __TBB_FetchAndStore2acquire(P,V) __TBB_machine_fetchstore2acquire(P,V)
+#define __TBB_FetchAndStore2release(P,V) __TBB_machine_fetchstore2release(P,V)
+#define __TBB_FetchAndStore4acquire(P,V) __TBB_machine_fetchstore4acquire(P,V)
+#define __TBB_FetchAndStore4release(P,V) __TBB_machine_fetchstore4release(P,V)
+#define __TBB_FetchAndStore8acquire(P,V) __TBB_machine_fetchstore8acquire(P,V)
+#define __TBB_FetchAndStore8release(P,V) __TBB_machine_fetchstore8release(P,V)
+
+#define __TBB_CompareAndSwap1acquire(P,V,C) __TBB_machine_cmpswp1acquire(P,V,C)
+#define __TBB_CompareAndSwap1release(P,V,C) __TBB_machine_cmpswp1release(P,V,C)
+#define __TBB_CompareAndSwap2acquire(P,V,C) __TBB_machine_cmpswp2acquire(P,V,C)
+#define __TBB_CompareAndSwap2release(P,V,C) __TBB_machine_cmpswp2release(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C)        __TBB_machine_cmpswp4__TBB_full_fence(P,V,C)
+#define __TBB_CompareAndSwap4acquire(P,V,C) __TBB_machine_cmpswp4acquire(P,V,C)
+#define __TBB_CompareAndSwap4release(P,V,C) __TBB_machine_cmpswp4release(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C)        __TBB_machine_cmpswp8__TBB_full_fence(P,V,C)
+#define __TBB_CompareAndSwap8acquire(P,V,C) __TBB_machine_cmpswp8acquire(P,V,C)
+#define __TBB_CompareAndSwap8release(P,V,C) __TBB_machine_cmpswp8release(P,V,C)
+
+#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4__TBB_full_fence(P,V)
+#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8__TBB_full_fence(P,V)
+
+#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1__TBB_full_fence(P,V)
+#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2__TBB_full_fence(P,V)
+#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4__TBB_full_fence(P,V)
+#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8__TBB_full_fence(P,V)
+
+#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAdd8acquire(P,1)
+#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAdd8release(P,-1)
+
+#ifndef __INTEL_COMPILER
+/* Even though GCC imbues volatile loads with acquire semantics, 
+   it sometimes moves loads over the acquire fence.  The
+   fences defined here stop such incorrect code motion. */
+#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"memory")
+#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory")
+#else
+#define __TBB_release_consistency_helper()
+#define __TBB_full_memory_fence() __mf()
+#endif /* __INTEL_COMPILER */
+
+// Special atomic functions
+#define __TBB_CompareAndSwapW(P,V,C)   __TBB_CompareAndSwap8(P,V,C)
+#define __TBB_FetchAndStoreW(P,V)      __TBB_FetchAndStore8(P,V)
+#define __TBB_FetchAndAddW(P,V)        __TBB_FetchAndAdd8(P,V)
+#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAdd8release(P,V)
+
+// Not needed
+#undef __TBB_Store8
+#undef __TBB_Load8
+
+// Definition of Lock functions
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
+#define __TBB_LockByte(P)    __TBB_machine_lockbyte(P)
+
+// Definition of other utility functions
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
+
diff --git a/tbb/include/tbb/machine/linux_intel64.h b/tbb/include/tbb/machine/linux_intel64.h
new file mode 100644 (file)
index 0000000..8d05762
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE 8
+#define __TBB_BIG_ENDIAN 0
+
+#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"memory")
+
+// __TBB_full_memory_fence can be predefined
+#ifndef __TBB_full_memory_fence
+#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
+#endif
+
+#define __MACHINE_DECL_ATOMICS(S,T,X) \
+static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
+{                                                                                    \
+    T result;                                                                        \
+                                                                                     \
+    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
+                          : "=a"(result), "=m"(*(volatile T*)ptr)                    \
+                          : "q"(value), "0"(comparand), "m"(*(volatile T*)ptr)       \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
+                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
+                          : "0"(addend), "m"(*(volatile T*)ptr)                      \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
+                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
+                          : "0"(value), "m"(*(volatile T*)ptr)                       \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     
+__MACHINE_DECL_ATOMICS(1,int8_t,"")
+__MACHINE_DECL_ATOMICS(2,int16_t,"")
+__MACHINE_DECL_ATOMICS(4,int32_t,"")
+__MACHINE_DECL_ATOMICS(8,int64_t,"q")
+
+static inline int64_t __TBB_machine_lg( uint64_t x ) {
+    int64_t j;
+    __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
+    return j;
+}
+
+static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) {
+    __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) {
+    __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory");
+}
+
+// Machine specific atomic operations
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+
+#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
+#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
+#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
+#define __TBB_FetchAndAdd8(P,V)  __TBB_machine_fetchadd8(P,V)
+#define __TBB_FetchAndAddW(P,V)  __TBB_machine_fetchadd8(P,V)
+
+#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
+#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
+#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
+#define __TBB_FetchAndStore8(P,V)  __TBB_machine_fetchstore8(P,V)
+#define __TBB_FetchAndStoreW(P,V)  __TBB_machine_fetchstore8(P,V)
+
+#undef __TBB_Store8
+#undef __TBB_Load8
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+// Definition of other functions
+#ifndef __TBB_Pause
+static inline void __TBB_machine_pause( int32_t delay ) {
+    for (int32_t i = 0; i < delay; i++) {
+       __asm__ __volatile__("pause;");
+    }
+    return;
+}
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#endif
+#define __TBB_Log2(V)    __TBB_machine_lg(V)
+
+// Special atomic functions
+#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
+#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
+#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1)
+
+// Use generic definitions from tbb_machine.h
+#undef __TBB_TryLockByte
+#undef __TBB_LockByte
+
+// API to retrieve/update FPU control setting
+#ifndef __TBB_CPU_CTL_ENV_PRESENT
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+struct __TBB_cpu_ctl_env_t {
+    int     mxcsr;
+    short   x87cw;
+};
+
+inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
+    __asm__ __volatile__ (
+            "stmxcsr %0\n\t"
+            "fstcw %1"
+            : "=m"(ctl->mxcsr), "=m"(ctl->x87cw)
+    );
+}
+inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
+    __asm__ __volatile__ (
+            "ldmxcsr %0\n\t"
+            "fldcw %1"
+            : : "m"(ctl->mxcsr), "m"(ctl->x87cw)
+    );
+}
+#endif
diff --git a/tbb/include/tbb/machine/mac_ppc.h b/tbb/include/tbb/machine/mac_ppc.h
new file mode 100644 (file)
index 0000000..3114039
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+// This file is for PowerPC with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
+
+// Motivation for use of "#if defined(__powerpc64__) || defined(__ppc64__)" to detect a 64-bit environment:
+// IBM XL documents both __powerpc64__ and __PPC64__, and these also appear to work on g++ (documentation?)
+// Apple documents __ppc64__ (with __ppc__ only 32-bit, which is not portable even to other environments using g++)
+inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
+{
+    int32_t result;
+
+    __asm__ __volatile__("sync\n"
+                         "0: lwarx %0,0,%2\n\t"  /* load w/ reservation */
+                         "cmpw %0,%4\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"           /* exit if not same */
+                         "stwcx. %3,0,%2\n\t"    /* store new_value */
+                         "bne- 0b\n"             /* retry if reservation lost */
+                         "1: sync"               /* the exit */
+                          : "=&r"(result), "=m"(* (int32_t*) ptr)
+                          : "r"(ptr), "r"(value), "r"(comparand), "m"(* (int32_t*) ptr)
+                          : "cr0", "memory");
+    return result;
+}
+
+#if defined(__powerpc64__) || defined(__ppc64__)
+
+inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t result;
+    __asm__ __volatile__("sync\n"
+                         "0: ldarx %0,0,%2\n\t"  /* load w/ reservation */
+                         "cmpd %0,%4\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"           /* exit if not same */
+                         "stdcx. %3,0,%2\n\t"    /* store new_value */
+                         "bne- 0b\n"             /* retry if reservation lost */
+                         "1: sync"               /* the exit */
+                          : "=&r"(result), "=m"(* (int64_t*) ptr)
+                          : "r"(ptr), "r"(value), "r"(comparand), "m"(* (int64_t*) ptr)
+                          : "cr0", "memory");
+    return result;
+}
+#else
+// Except for special circumstances, 32-bit builds are meant to run on actual 32-bit hardware
+// A locked implementation would also be a possibility
+#define __TBB_64BIT_ATOMICS 0
+#endif /* 64bit CAS */
+
+#define __TBB_BIG_ENDIAN 1
+
+#if defined(__powerpc64__) || defined(__ppc64__)
+#define __TBB_WORDSIZE 8
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#else
+#define __TBB_WORDSIZE 4
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#endif
+
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#if __TBB_64BIT_ATOMICS
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#endif
+#define __TBB_full_memory_fence() __asm__ __volatile__("sync": : :"memory")
+#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
+
+#if !__IBMCPP__
+// "1501-230 (S) Internal compiler error; please contact your Service Representative"
+static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
+    // TODO: assumes sizeof(uintptr_t)<=8 resp. 4
+    #if defined(__powerpc64__) || defined(__ppc64__)
+    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); // counting starts at 2^63
+    return 63-static_cast<intptr_t>(x);
+    #else
+    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); // counting starts at 2^31 (on 64-bit hardware, higher-order bits are ignored)
+    return 31-static_cast<intptr_t>(x);
+    #endif
+}
+#define __TBB_Log2(V) __TBB_machine_lg(V)
+#endif
+
+#define __TBB_Byte uint32_t // TODO: would this ever not be aligned without an alignment specification?
+
+inline bool __TBB_machine_trylockbyte( __TBB_Byte &flag ) {
+    return __TBB_machine_cmpswp4(&flag,1,0)==0;
+}
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
diff --git a/tbb/include/tbb/machine/macos_common.h b/tbb/include/tbb/machine/macos_common.h
new file mode 100644 (file)
index 0000000..c0e8799
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <sched.h>
+#define __TBB_Yield()  sched_yield()
+
+
+// __TBB_HardwareConcurrency
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static inline int __TBB_macos_available_cpu() {
+    int name[2] = {CTL_HW, HW_AVAILCPU};
+    int ncpu;
+    size_t size = sizeof(ncpu);
+    sysctl( name, 2, &ncpu, &size, NULL, 0 );
+    return ncpu;
+}
+
+#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
+
+
+#ifndef __TBB_WORDSIZE
+#define __TBB_WORDSIZE 4
+#endif
+
+#ifndef __TBB_BIG_ENDIAN
+#if __BIG_ENDIAN__
+#define __TBB_BIG_ENDIAN 1
+#else
+#define __TBB_BIG_ENDIAN 0
+#endif
+#endif
+
+
+#if !defined(__TBB_CompareAndSwap4) || !defined(__TBB_CompareAndSwap8)
+
+// Implementation of atomic operations based on OS provided primitives
+#include <libkern/OSAtomic.h>
+
+#define __TBB_release_consistency_helper() OSMemoryBarrier()
+#define __TBB_full_memory_fence()          OSMemoryBarrier()
+
+static inline int32_t __TBB_macos_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
+{
+    __TBB_ASSERT( !((uintptr_t)ptr&0x3), "address not properly aligned for Mac OS atomics");
+    int32_t* address = (int32_t*)ptr;
+    while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
+        int32_t snapshot = *address;
+        if( snapshot!=comparand ) return snapshot;
+    }
+    return comparand;
+}
+
+static inline int64_t __TBB_macos_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand)
+{
+    __TBB_ASSERT( !((uintptr_t)ptr&0x7), "address not properly aligned for Mac OS atomics");
+    int64_t* address = (int64_t*)ptr;
+    while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
+#if __TBB_WORDSIZE==8
+        int64_t snapshot = *address;
+#else
+        int64_t snapshot = OSAtomicAdd64( 0, address );
+#endif
+        if( snapshot!=comparand ) return snapshot;
+    }
+    return comparand;
+}
+
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_macos_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_macos_cmpswp8(P,V,C)
+
+static inline int32_t __TBB_macos_fetchadd4(volatile void *ptr, int32_t addend)
+{
+    __TBB_ASSERT( !((uintptr_t)ptr&0x3), "address not properly aligned for Mac OS atomics");
+    return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
+}
+
+static inline int64_t __TBB_macos_fetchadd8(volatile void *ptr, int64_t addend)
+{
+    __TBB_ASSERT( !((uintptr_t)ptr&0x7), "address not properly aligned for Mac OS atomics");
+    return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
+}
+
+#define __TBB_FetchAndAdd4(P,V) __TBB_macos_fetchadd4(P,V)
+#define __TBB_FetchAndAdd8(P,V) __TBB_macos_fetchadd8(P,V)
+
+#if __TBB_WORDSIZE==4
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap4(P,V,C)
+#define __TBB_FetchAndAddW(P,V) __TBB_FetchAndAdd4(P,V)
+#else
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap8(P,V,C)
+#define __TBB_FetchAndAddW(P,V) __TBB_FetchAndAdd8(P,V)
+#endif
+
+#endif /* !defined(__TBB_CompareAndSwap4) || !defined(__TBB_CompareAndSwap8) */
diff --git a/tbb/include/tbb/machine/sunos_sparc.h b/tbb/include/tbb/machine/sunos_sparc.h
new file mode 100644 (file)
index 0000000..ca228fa
--- /dev/null
@@ -0,0 +1,228 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE 8
+#define __TBB_BIG_ENDIAN 1
+
+#define __TBB_release_consistency_helper() __asm__ __volatile__ ("": : :"memory")
+#define __TBB_full_memory_fence() __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
+
+//--------------------------------------------------
+// Compare and swap
+//--------------------------------------------------
+
+/**
+ * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ ( @return value originally in memory at ptr, regardless of success
+*/
+static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
+  int32_t result;
+  __asm__ __volatile__(
+                       "cas\t[%5],%4,%1"
+                       : "=m"(*(int32_t *)ptr), "=r"(result)
+                       : "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
+                       : "memory");
+  return result;
+}
+
+/**
+ * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ ( @return value originally in memory at ptr, regardless of success
+ */
+static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
+  int64_t result;
+  __asm__ __volatile__(
+                       "casx\t[%5],%4,%1"
+               : "=m"(*(int64_t *)ptr), "=r"(result)
+               : "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
+               : "memory");
+  return result;
+}
+
+//---------------------------------------------------
+// Fetch and add
+//---------------------------------------------------
+
+/**
+ * Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
+ * @param ptr pointer to value to add addend to
+ * @param addened value to add to *ptr
+ * @return value at ptr before addened was added
+ */
+static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
+  int32_t result;
+  __asm__ __volatile__ (                                 
+                        "0:\t add\t %3, %4, %0\n"    // do addition
+                        "\t cas\t [%2], %3, %0\n"        // cas to store result in memory
+                        "\t cmp\t %3, %0\n"            // check if value from memory is original
+                        "\t bne,a,pn\t %%icc, 0b\n"        // if not try again
+                        "\t mov %0, %3\n"            // use branch delay slot to move new value in memory to be added
+               : "=&r"(result), "=m"(*(int32_t *)ptr)
+               : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
+               : "ccr", "memory");
+  return result;
+}
+
+/**
+ * Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
+ * @param ptr pointer to value to add addend to
+ * @param addened value to add to *ptr
+ * @return value at ptr before addened was added
+ */
+static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
+  int64_t result;
+  __asm__ __volatile__ (
+                        "0:\t add\t %3, %4, %0\n"    // do addition
+                        "\t casx\t [%2], %3, %0\n"        // cas to store result in memory
+                        "\t cmp\t %3, %0\n"            // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n"        // if not try again
+                        "\t mov %0, %3\n"            // use branch delay slot to move new value in memory to be added
+                : "=&r"(result), "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
+                : "ccr", "memory");
+  return result;
+}
+
+//--------------------------------------------------------
+// Logarithm (base two, integer)
+//--------------------------------------------------------
+
+static inline int64_t __TBB_machine_lg( uint64_t x ) {
+    uint64_t count;
+    // one hot encode
+    x |= (x >> 1);
+    x |= (x >> 2);
+    x |= (x >> 4);
+    x |= (x >> 8);
+    x |= (x >> 16);
+    x |= (x >> 32);
+    // count 1's
+    __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
+    return count-1;
+}
+
+//--------------------------------------------------------
+
+static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) {
+  __asm__ __volatile__ (
+                        "0:\t or\t %2, %3, %%g1\n" // do addition
+                        "\t casx\t [%1], %2, %%g1\n"            // cas to store result in memory
+                        "\t cmp\t %2, %%g1\n"                   // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n" // if not try again
+                        "\t mov %%g1, %2\n"                     // use branch delay slot to move new value in memory to be added
+                : "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
+                : "ccr", "g1", "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) {
+  __asm__ __volatile__ (
+                        "0:\t and\t %2, %3, %%g1\n"        // do addition
+                        "\t casx\t [%1], %2, %%g1\n"            // cas to store result in memory
+                        "\t cmp\t %2, %%g1\n"                   // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
+                        "\t mov %%g1, %2\n"                     // use branch delay slot to move new value in memory to be added
+                : "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
+                : "ccr", "g1", "memory");
+}
+
+
+static inline void __TBB_machine_pause( int32_t delay ) {
+    // do nothing, inlined, doesnt matter
+}
+
+// put 0xff in memory location, return memory value,
+//  generic trylockbyte puts 0x01, however this is fine
+//  because all that matters is that 0 is unlocked
+static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
+    unsigned char result;
+    __asm__ __volatile__ (
+            "ldstub\t [%2], %0\n"
+        : "=r"(result), "=m"(flag)
+        : "r"(&flag), "m"(flag)
+        : "memory");
+    return result == 0;
+}
+
+
+// Machine specific atomic operations
+
+//#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)  // use generic version in tbb_machine.h
+//#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)  // use generic version in tbb_machine.h
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+
+//#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)       // use generic version in tbb_machine.h
+//#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)       // use generic version in tbb_machine.h
+#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
+#define __TBB_FetchAndAdd8(P,V)  __TBB_machine_fetchadd8(P,V)
+#define __TBB_FetchAndAddW(P,V)  __TBB_machine_fetchadd8(P,V)
+
+// use generic version in tbb_machine.h
+//#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)  
+//#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
+//#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
+//#define __TBB_FetchAndStore8(P,V)  __TBB_machine_fetchstore8(P,V)
+//#define __TBB_FetchAndStoreW(P,V)  __TBB_machine_fetchstore8(P,V)
+
+#undef __TBB_Store8
+#undef __TBB_Load8
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+// Definition of other functions
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)    __TBB_machine_lg(V)
+
+// Special atomic functions
+#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
+#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
+#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1)
+
+// Definition of Lock functions
+// Repeatedly runs TryLockByte, no need to implement
+#undef __TBB_LockByte
+
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
diff --git a/tbb/include/tbb/machine/windows_api.h b/tbb/include/tbb/machine/windows_api.h
new file mode 100644 (file)
index 0000000..3749560
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_windows_api_H
+#define __TBB_machine_windows_api_H
+
+#if _WIN32 || _WIN64
+
+#if _XBOX
+
+#define NONET
+#define NOD3D
+#include <xtl.h>
+
+#else // Assume "usual" Windows
+
+#include <windows.h>
+
+#endif // _XBOX
+
+#if !defined(_WIN32_WINNT)
+// The following Windows API function is declared explicitly;
+// otherwise any user would have to specify /D_WIN32_WINNT=0x0400
+extern "C" BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
+#endif
+
+#else
+#error tbb/machine/windows_api.h should only be used for Windows based platforms
+#endif // _WIN32 || _WIN64
+
+#endif // __TBB_machine_windows_api_H
diff --git a/tbb/include/tbb/machine/windows_ia32.h b/tbb/include/tbb/machine/windows_ia32.h
new file mode 100644 (file)
index 0000000..22dbddd
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#if defined(__INTEL_COMPILER)
+#define __TBB_release_consistency_helper() __asm { __asm nop }
+#elif _MSC_VER >= 1300
+extern "C" void _ReadWriteBarrier();
+#pragma intrinsic(_ReadWriteBarrier)
+#define __TBB_release_consistency_helper() _ReadWriteBarrier()
+#else
+#error Unsupported compiler - need to define __TBB_release_consistency_helper to support it
+#endif
+
+#define __TBB_full_memory_fence() __asm { __asm mfence }
+
+#define __TBB_WORDSIZE 4
+#define __TBB_BIG_ENDIAN 0
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    // Workaround for overzealous compiler warnings in /Wp64 mode
+    #pragma warning (push)
+    #pragma warning (disable: 4244 4267)
+#endif
+
+extern "C" {
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
+    void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
+}
+
+#define __TBB_DEFINE_ATOMICS(S,T,U,A,C) \
+static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __TBB_release_consistency_helper(); \
+    __asm \
+    { \
+       __asm mov edx, p \
+       __asm mov C , value \
+       __asm mov A , comparand \
+       __asm lock cmpxchg [edx], C \
+       __asm mov result, A \
+    } \
+    __TBB_release_consistency_helper(); \
+    return result; \
+} \
+\
+static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __TBB_release_consistency_helper(); \
+    __asm \
+    { \
+        __asm mov edx, p \
+        __asm mov A, addend \
+        __asm lock xadd [edx], A \
+        __asm mov result, A \
+    } \
+    __TBB_release_consistency_helper(); \
+    return result; \
+}\
+\
+static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __TBB_release_consistency_helper(); \
+    __asm \
+    { \
+        __asm mov edx, p \
+        __asm mov A, value \
+        __asm lock xchg [edx], A \
+        __asm mov result, A \
+    } \
+    __TBB_release_consistency_helper(); \
+    return result; \
+}
+
+__TBB_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
+__TBB_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
+__TBB_DEFINE_ATOMICS(4, __int32, __int32, eax, ecx)
+__TBB_DEFINE_ATOMICS(W, ptrdiff_t, ptrdiff_t, eax, ecx)
+
+static inline __int32 __TBB_machine_lg( unsigned __int64 i ) {
+    unsigned __int32 j;
+    __asm
+    {
+        bsr eax, i
+        mov j, eax
+    }
+    return j;
+}
+
+static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) {
+   __asm 
+   {
+       mov eax, addend
+       mov edx, [operand]
+       lock or [edx], eax
+   }
+}
+
+static inline void __TBB_machine_AND( volatile void *operand, __int32 addend ) {
+   __asm 
+   {
+       mov eax, addend
+       mov edx, [operand]
+       lock and [edx], eax
+   }
+}
+
+static inline void __TBB_machine_pause (__int32 delay ) {
+    _asm 
+    {
+        mov eax, delay
+      L1: 
+        pause
+        add eax, -1
+        jne L1  
+    }
+    return;
+}
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswpW(P,V,C)
+
+#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
+#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
+#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
+#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V)
+#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchaddW(P,V)
+
+#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
+#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
+#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
+#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V)
+#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstoreW(P,V)
+
+// Should define this: 
+#define __TBB_Store8(P,V) __TBB_machine_store8(P,V)
+#define __TBB_Load8(P) __TBB_machine_load8(P)
+#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
+
+// Definition of other functions
+extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
+#define __TBB_Yield()  SwitchToThread()
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)    __TBB_machine_lg(V)
+
+// Use generic definitions from tbb_machine.h
+#undef __TBB_TryLockByte
+#undef __TBB_LockByte
+
+#if defined(_MSC_VER)&&_MSC_VER<1400
+    static inline void* __TBB_machine_get_current_teb () {
+        void* pteb;
+        __asm mov eax, fs:[0x18]
+        __asm mov pteb, eax
+        return pteb;
+    }
+#endif
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warnings 4244, 4267 are back
+
+// API to retrieve/update FPU control setting
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+struct __TBB_cpu_ctl_env_t {
+    int     mxcsr;
+    short   x87cw;
+};
+inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
+    __asm {
+        __asm mov     eax, ctl
+        __asm stmxcsr [eax]
+        __asm fstcw   [eax+4]
+    }
+}
+inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
+    __asm {
+        __asm mov     eax, ctl
+        __asm ldmxcsr [eax]
+        __asm fldcw   [eax+4]
+    }
+}
+
diff --git a/tbb/include/tbb/machine/windows_intel64.h b/tbb/include/tbb/machine/windows_intel64.h
new file mode 100644 (file)
index 0000000..9a45f5d
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#include <intrin.h>
+#if !defined(__INTEL_COMPILER)
+#pragma intrinsic(_InterlockedOr64)
+#pragma intrinsic(_InterlockedAnd64)
+#pragma intrinsic(_InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedCompareExchange64)
+#pragma intrinsic(_InterlockedExchangeAdd)
+#pragma intrinsic(_InterlockedExchangeAdd64)
+#pragma intrinsic(_InterlockedExchange)
+#pragma intrinsic(_InterlockedExchange64)
+#endif /* !defined(__INTEL_COMPILER) */
+
+#if defined(__INTEL_COMPILER)
+#define __TBB_release_consistency_helper() __asm { __asm nop }
+#define __TBB_full_memory_fence() __asm { __asm mfence }
+#elif _MSC_VER >= 1300
+extern "C" void _ReadWriteBarrier();
+#pragma intrinsic(_ReadWriteBarrier)
+#define __TBB_release_consistency_helper() _ReadWriteBarrier()
+#pragma intrinsic(_mm_mfence)
+#define __TBB_full_memory_fence() _mm_mfence()
+#endif
+
+#define __TBB_WORDSIZE 8
+#define __TBB_BIG_ENDIAN 0
+
+// ATTENTION: if you ever change argument types in machine-specific primitives,
+// please take care of atomic_word<> specializations in tbb/atomic.h
+extern "C" {
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
+    void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay );
+}
+
+
+#if !__INTEL_COMPILER
+extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __int64 w );
+#pragma intrinsic(_BitScanReverse64)
+#endif
+
+inline __int64 __TBB_machine_lg( unsigned __int64 i ) {
+#if __INTEL_COMPILER
+    unsigned __int64 j;
+    __asm
+    {
+        bsr rax, i
+        mov j, rax
+    }
+#else
+    unsigned long j;
+    _BitScanReverse64( &j, i );
+#endif
+    return j;
+}
+
+inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
+    _InterlockedOr64((__int64*)operand, addend); 
+}
+
+inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
+    _InterlockedAnd64((__int64*)operand, addend); 
+}
+
+#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
+#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
+#define __TBB_CompareAndSwap4(P,V,C) _InterlockedCompareExchange( (long*) P , V , C ) 
+#define __TBB_CompareAndSwap8(P,V,C) _InterlockedCompareExchange64( (__int64*) P , V , C )
+#define __TBB_CompareAndSwapW(P,V,C) _InterlockedCompareExchange64( (__int64*) P , V , C )
+
+#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
+#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
+#define __TBB_FetchAndAdd4(P,V) _InterlockedExchangeAdd((long*) P , V )
+#define __TBB_FetchAndAdd8(P,V) _InterlockedExchangeAdd64((__int64*) P , V )
+#define __TBB_FetchAndAddW(P,V) _InterlockedExchangeAdd64((__int64*) P , V )
+
+#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
+#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
+#define __TBB_FetchAndStore4(P,V) _InterlockedExchange((long*) P , V )
+#define __TBB_FetchAndStore8(P,V) _InterlockedExchange64((__int64*) P , V )
+#define __TBB_FetchAndStoreW(P,V) _InterlockedExchange64((__int64*) P , V ) 
+
+// Not used if wordsize == 8
+#undef __TBB_Store8
+#undef __TBB_Load8
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
+
+extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
+#define __TBB_Yield()  SwitchToThread()
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)    __TBB_machine_lg(V)
+
+// Use generic definitions from tbb_machine.h
+#undef __TBB_TryLockByte
+#undef __TBB_LockByte
+
+// API to retrieve/update FPU control setting
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+struct __TBB_cpu_ctl_env_t {
+    int     mxcsr;
+    short   x87cw;
+};
+
+extern "C" {
+    void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* );
+    void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* );
+}
diff --git a/tbb/include/tbb/machine/xbox360_ppc.h b/tbb/include/tbb/machine/xbox360_ppc.h
new file mode 100644 (file)
index 0000000..7bde308
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#define NONET
+#define NOD3D
+#include "xtl.h"    
+#include "ppcintrinsics.h"
+
+#if _MSC_VER >= 1300
+extern "C" void _MemoryBarrier();
+#pragma intrinsic(_MemoryBarrier)
+#define __TBB_release_consistency_helper() _MemoryBarrier()
+#endif
+
+#define __TBB_full_memory_fence() __sync()
+
+#define __TBB_WORDSIZE 4
+#define __TBB_BIG_ENDIAN 1
+
+//todo: define __TBB_DECL_FENCED_ATOMICS and define acquire/release primitives to maximize performance
+
+typedef __int64 int64_t;  //required for definition of Store8/Load8 in atomic.h
+typedef unsigned char uint8_t;  //same reason
+
+inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __int32 comparand )
+{                               
+ __lwsync();
+ __int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, comparand);
+ __lwsync();
+ return result;
+}
+
+inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __int64 comparand )
+{
+ __lwsync();
+ __int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value, comparand);
+ __lwsync();
+ return result;
+}
+
+#pragma optimize( "", off )
+inline void __TBB_machine_pause (__int32 delay ) 
+{
+ for (__int32 i=0; i<delay; i++) {;};
+}
+#pragma optimize( "", on ) 
+
+
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_Yield()  Sleep(0)
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+
+// This port uses only 2 hardware threads for TBB on XBOX 360. 
+// Others are left to sound etc.
+// Change the following mask to allow TBB use more HW threads.
+static const int __TBB_XBOX360_HARDWARE_THREAD_MASK = 0x0C;
+
+static inline int __TBB_XBOX360_DetectNumberOfWorkers() 
+{
+     char a[__TBB_XBOX360_HARDWARE_THREAD_MASK];  //compile time assert - at least one bit should be set always
+     a[0]=0;
+
+     return ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 0) & 1) +
+            ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 1) & 1) +
+            ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 2) & 1) +
+            ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 3) & 1) +
+            ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 4) & 1) +
+            ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 5) & 1) + 1;  // +1 accomodates for the master thread
+}
+
+static inline int __TBB_XBOX360_GetHardwareThreadIndex(int workerThreadIndex)
+{
+    workerThreadIndex %= __TBB_XBOX360_DetectNumberOfWorkers()-1;
+    int m = __TBB_XBOX360_HARDWARE_THREAD_MASK;
+    int index = 0;
+    int skipcount = workerThreadIndex;
+    while (true)
+    {
+        if ((m & 1)!=0) 
+        {
+            if (skipcount==0) break;
+            skipcount--;
+        }
+        m >>= 1;
+       index++;
+    }
+    return index; 
+}
+
+#define __TBB_HardwareConcurrency() __TBB_XBOX360_DetectNumberOfWorkers()
diff --git a/tbb/include/tbb/mutex.h b/tbb/include/tbb/mutex.h
new file mode 100644 (file)
index 0000000..b45e67d
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_mutex_H
+#define __TBB_mutex_H
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#endif /* _WIN32||_WIN64 */
+
+#include <new>
+#include "aligned_space.h"
+#include "tbb_stddef.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! Wrapper around the platform's native reader-writer lock.
+/** For testing purposes only.
+    @ingroup synchronization */
+class mutex {
+public:
+    //! Construct unacquired mutex.
+    mutex() {
+#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
+    internal_construct();
+#else
+  #if _WIN32||_WIN64
+        InitializeCriticalSection(&impl);
+  #else
+        int error_code = pthread_mutex_init(&impl,NULL);
+        if( error_code )
+            tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_init failed");
+  #endif /* _WIN32||_WIN64*/
+#endif /* TBB_USE_ASSERT */
+    };
+
+    ~mutex() {
+#if TBB_USE_ASSERT
+        internal_destroy();
+#else
+  #if _WIN32||_WIN64
+        DeleteCriticalSection(&impl);
+  #else
+        pthread_mutex_destroy(&impl); 
+
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    };
+
+    class scoped_lock;
+    friend class scoped_lock;
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock : internal::no_copy {
+    public:
+        //! Construct lock that has not acquired a mutex. 
+        scoped_lock() : my_mutex(NULL) {};
+
+        //! Acquire lock on given mutex.
+        scoped_lock( mutex& mutex ) {
+            acquire( mutex );
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( my_mutex ) 
+                release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( mutex& mutex ) {
+#if TBB_USE_ASSERT
+            internal_acquire(mutex);
+#else
+            mutex.lock();
+            my_mutex = &mutex;
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Try acquire lock on given mutex.
+        bool try_acquire( mutex& mutex ) {
+#if TBB_USE_ASSERT
+            return internal_try_acquire (mutex);
+#else
+            bool result = mutex.try_lock();
+            if( result )
+                my_mutex = &mutex;
+            return result;
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Release lock
+        void release() {
+#if TBB_USE_ASSERT
+            internal_release ();
+#else
+            my_mutex->unlock();
+            my_mutex = NULL;
+#endif /* TBB_USE_ASSERT */
+        }
+
+    private:
+        //! The pointer to the current mutex to work
+        mutex* my_mutex;
+
+        //! All checks from acquire using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_acquire( mutex& m );
+
+        //! All checks from try_acquire using mutex.state were moved here
+        bool __TBB_EXPORTED_METHOD internal_try_acquire( mutex& m );
+
+        //! All checks from release using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_release();
+
+        friend class mutex;
+    };
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        new(tmp.begin()) scoped_lock(*this);
+#else
+  #if _WIN32||_WIN64
+        EnterCriticalSection(&impl);
+  #else
+        pthread_mutex_lock(&impl);
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = NULL;
+        return s.internal_try_acquire(*this);
+#else
+  #if _WIN32||_WIN64
+        return TryEnterCriticalSection(&impl)!=0;
+  #else
+        return pthread_mutex_trylock(&impl)==0;
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Release lock
+    void unlock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = this;
+        s.internal_release();
+#else
+  #if _WIN32||_WIN64
+        LeaveCriticalSection(&impl);
+  #else
+        pthread_mutex_unlock(&impl);
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Return native_handle
+  #if _WIN32||_WIN64
+    typedef LPCRITICAL_SECTION native_handle_type;
+  #else
+    typedef pthread_mutex_t* native_handle_type;
+  #endif
+    native_handle_type native_handle() { return (native_handle_type) &impl; }
+
+    enum state_t {
+        INITIALIZED=0x1234,
+        DESTROYED=0x789A,
+        HELD=0x56CD
+    };
+private:
+#if _WIN32||_WIN64
+    CRITICAL_SECTION impl;    
+    enum state_t state;
+#else
+    pthread_mutex_t impl;
+#endif /* _WIN32||_WIN64 */
+
+    //! All checks from mutex constructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    //! All checks from mutex destructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_destroy();
+
+#if _WIN32||_WIN64
+public:
+    //!  Set the internal state
+    void set_state( state_t to ) { state = to; }
+#endif
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(mutex)
+
+} // namespace tbb 
+
+#endif /* __TBB_mutex_H */
diff --git a/tbb/include/tbb/null_mutex.h b/tbb/include/tbb/null_mutex.h
new file mode 100644 (file)
index 0000000..b233b4b
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_null_mutex_H
+#define __TBB_null_mutex_H
+
+namespace tbb {
+    
+//! A mutex which does nothing
+/** A null_mutex does no operation and simulates success.
+    @ingroup synchronization */
+class null_mutex {   
+    //! Deny assignment and copy construction 
+    null_mutex( const null_mutex& );   
+    void operator=( const null_mutex& );   
+public:   
+    //! Represents acquisition of a mutex.
+    class scoped_lock {   
+    public:   
+        scoped_lock() {}
+        scoped_lock( null_mutex& ) {}   
+        ~scoped_lock() {}
+        void acquire( null_mutex& ) {}
+        bool try_acquire( null_mutex& ) { return true; }
+        void release() {}
+    };
+  
+    null_mutex() {}
+    
+    // Mutex traits   
+    static const bool is_rw_mutex = false;   
+    static const bool is_recursive_mutex = true;
+    static const bool is_fair_mutex = true;
+};  
+
+}
+
+#endif /* __TBB_null_mutex_H */
diff --git a/tbb/include/tbb/null_rw_mutex.h b/tbb/include/tbb/null_rw_mutex.h
new file mode 100644 (file)
index 0000000..d7e51cf
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_null_rw_mutex_H
+#define __TBB_null_rw_mutex_H
+
+namespace tbb {
+    
+//! A rw mutex which does nothing
+/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation.
+    @ingroup synchronization */
+class null_rw_mutex {
+    //! Deny assignment and copy construction 
+    null_rw_mutex( const null_rw_mutex& );   
+    void operator=( const null_rw_mutex& );   
+public:   
+    //! Represents acquisition of a mutex.
+    class scoped_lock {   
+    public:   
+        scoped_lock() {}
+        scoped_lock( null_rw_mutex& , bool = true ) {}
+        ~scoped_lock() {}
+        void acquire( null_rw_mutex& , bool = true ) {}
+        bool upgrade_to_writer() { return true; }
+        bool downgrade_to_reader() { return true; }
+        bool try_acquire( null_rw_mutex& , bool = true ) { return true; }
+        void release() {}
+    };
+  
+    null_rw_mutex() {}
+    
+    // Mutex traits   
+    static const bool is_rw_mutex = true;   
+    static const bool is_recursive_mutex = true;
+    static const bool is_fair_mutex = true;
+};  
+
+}
+
+#endif /* __TBB_null_rw_mutex_H */
diff --git a/tbb/include/tbb/parallel_do.h b/tbb/include/tbb/parallel_do.h
new file mode 100644 (file)
index 0000000..f54d3b3
--- /dev/null
@@ -0,0 +1,508 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_do_H
+#define __TBB_parallel_do_H
+
+#include "task.h"
+#include "aligned_space.h"
+#include <iterator>
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    template<typename Body, typename Item> class parallel_do_feeder_impl;
+    template<typename Body> class do_group_task;
+
+    //! Strips its template type argument from 'cv' and '&' qualifiers
+    template<typename T>
+    struct strip { typedef T type; };
+    template<typename T>
+    struct strip<T&> { typedef T type; };
+    template<typename T>
+    struct strip<const T&> { typedef T type; };
+    template<typename T>
+    struct strip<volatile T&> { typedef T type; };
+    template<typename T>
+    struct strip<const volatile T&> { typedef T type; };
+    // Most of the compilers remove cv-qualifiers from non-reference function argument types. 
+    // But unfortunately there are those that don't.
+    template<typename T>
+    struct strip<const T> { typedef T type; };
+    template<typename T>
+    struct strip<volatile T> { typedef T type; };
+    template<typename T>
+    struct strip<const volatile T> { typedef T type; };
+} // namespace internal
+//! @endcond
+
+//! Class the user supplied algorithm body uses to add new tasks
+/** \param Item Work item type **/
+template<typename Item>
+class parallel_do_feeder: internal::no_copy
+{
+    parallel_do_feeder() {}
+    virtual ~parallel_do_feeder () {}
+    virtual void internal_add( const Item& item ) = 0;
+    template<typename Body_, typename Item_> friend class internal::parallel_do_feeder_impl;
+public:
+    //! Add a work item to a running parallel_do.
+    void add( const Item& item ) {internal_add(item);}
+};
+
+//! @cond INTERNAL
+namespace internal {
+    //! For internal use only.
+    /** Selects one of the two possible forms of function call member operator.
+        @ingroup algorithms **/
+    template<class Body, typename Item>
+    class parallel_do_operator_selector
+    {
+        typedef parallel_do_feeder<Item> Feeder;
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, A1& arg1, A2&, void (Body::*)(CvItem) const ) {
+            obj(arg1);
+        }
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, A1& arg1, A2& arg2, void (Body::*)(CvItem, parallel_do_feeder<Item>&) const ) {
+            obj(arg1, arg2);
+        }
+
+    public:
+        template<typename A1, typename A2 >
+        static void call( const Body& obj, A1& arg1, A2& arg2 )
+        {
+            internal_call( obj, arg1, arg2, &Body::operator() );
+        }
+    };
+
+    //! For internal use only.
+    /** Executes one iteration of a do.
+        @ingroup algorithms */
+    template<typename Body, typename Item>
+    class do_iteration_task: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        Item my_value;
+        feeder_type& my_feeder;
+
+        do_iteration_task( const Item& value, feeder_type& feeder ) : 
+            my_value(value), my_feeder(feeder)
+        {}
+
+        /*override*/ 
+        task* execute()
+        {
+            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, my_value, my_feeder);
+            return NULL;
+        }
+
+        template<typename Body_, typename Item_> friend class parallel_do_feeder_impl;
+    }; // class do_iteration_task
+
+    template<typename Iterator, typename Body, typename Item>
+    class do_iteration_task_iter: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        Iterator my_iter;
+        feeder_type& my_feeder;
+
+        do_iteration_task_iter( const Iterator& iter, feeder_type& feeder ) : 
+            my_iter(iter), my_feeder(feeder)
+        {}
+
+        /*override*/ 
+        task* execute()
+        {
+            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, *my_iter, my_feeder);
+            return NULL;
+        }
+
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_group_task_forward;    
+        template<typename Body_, typename Item_> friend class do_group_task_input;    
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;    
+    }; // class do_iteration_task_iter
+
+    //! For internal use only.
+    /** Implements new task adding procedure.
+        @ingroup algorithms **/
+    template<class Body, typename Item>
+    class parallel_do_feeder_impl : public parallel_do_feeder<Item>
+    {
+        /*override*/ 
+        void internal_add( const Item& item )
+        {
+            typedef do_iteration_task<Body, Item> iteration_type;
+
+            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
+
+            t.spawn( t );
+        }
+    public:
+        const Body* my_body;
+        empty_task* my_barrier;
+
+        parallel_do_feeder_impl()
+        {
+            my_barrier = new( task::allocate_root() ) empty_task();
+            __TBB_ASSERT(my_barrier, "root task allocation failed");
+        }
+
+#if __TBB_TASK_GROUP_CONTEXT
+        parallel_do_feeder_impl(tbb::task_group_context &context)
+        {
+            my_barrier = new( task::allocate_root(context) ) empty_task();
+            __TBB_ASSERT(my_barrier, "root task allocation failed");
+        }
+#endif
+
+        ~parallel_do_feeder_impl()
+        {
+            my_barrier->destroy(*my_barrier);
+        }
+    }; // class parallel_do_feeder_impl
+
+
+    //! For internal use only
+    /** Unpacks a block of iterations.
+        @ingroup algorithms */
+    
+    template<typename Iterator, typename Body, typename Item>
+    class do_group_task_forward: public task
+    {
+        static const size_t max_arg_size = 4;         
+
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        feeder_type& my_feeder;
+        Iterator my_first;
+        size_t my_size;
+        
+        do_group_task_forward( Iterator first, size_t size, feeder_type& feeder ) 
+            : my_feeder(feeder), my_first(first), my_size(size)
+        {}
+
+        /*override*/ task* execute()
+        {
+            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
+            __TBB_ASSERT( my_size>0, NULL );
+            task_list list;
+            task* t; 
+            size_t k=0; 
+            for(;;) {
+                t = new( allocate_child() ) iteration_type( my_first, my_feeder );
+                ++my_first;
+                if( ++k==my_size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+
+        template<typename Iterator_, typename Body_, typename _Item> friend class do_task_iter;
+    }; // class do_group_task_forward
+
+    template<typename Body, typename Item>
+    class do_group_task_input: public task
+    {
+        static const size_t max_arg_size = 4;         
+        
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        feeder_type& my_feeder;
+        size_t my_size;
+        aligned_space<Item, max_arg_size> my_arg;
+
+        do_group_task_input( feeder_type& feeder ) 
+            : my_feeder(feeder), my_size(0)
+        {}
+
+        /*override*/ task* execute()
+        {
+            typedef do_iteration_task_iter<Item*, Body, Item> iteration_type;
+            __TBB_ASSERT( my_size>0, NULL );
+            task_list list;
+            task* t; 
+            size_t k=0; 
+            for(;;) {
+                t = new( allocate_child() ) iteration_type( my_arg.begin() + k, my_feeder );
+                if( ++k==my_size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+
+        ~do_group_task_input(){
+            for( size_t k=0; k<my_size; ++k)
+                (my_arg.begin() + k)->~Item();
+        }
+
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
+    }; // class do_group_task_input
+    
+    //! For internal use only.
+    /** Gets block of iterations and packages them into a do_group_task.
+        @ingroup algorithms */
+    template<typename Iterator, typename Body, typename Item>
+    class do_task_iter: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+    public:
+        do_task_iter( Iterator first, Iterator last , feeder_type& feeder ) : 
+            my_first(first), my_last(last), my_feeder(feeder)
+        {}
+
+    private:
+        Iterator my_first;
+        Iterator my_last;
+        feeder_type& my_feeder;
+
+        /* Do not merge run(xxx) and run_xxx() methods. They are separated in order
+            to make sure that compilers will eliminate unused argument of type xxx
+            (that is will not put it on stack). The sole purpose of this argument 
+            is overload resolution.
+            
+            An alternative could be using template functions, but explicit specialization 
+            of member function templates is not supported for non specialized class 
+            templates. Besides template functions would always fall back to the least 
+            efficient variant (the one for input iterators) in case of iterators having 
+            custom tags derived from basic ones. */
+        /*override*/ task* execute()
+        {
+            typedef typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
+            return run( (iterator_tag*)NULL );
+        }
+
+        /** This is the most restricted variant that operates on input iterators or
+            iterators with unknown tags (tags not derived from the standard ones). **/
+        inline task* run( void* ) { return run_for_input_iterator(); }
+        
+        task* run_for_input_iterator() {
+            typedef do_group_task_input<Body, Item> block_type;
+
+            block_type& t = *new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(my_feeder);
+            size_t k=0; 
+            while( !(my_first == my_last) ) {
+                new (t.my_arg.begin() + k) Item(*my_first);
+                ++my_first;
+                if( ++k==block_type::max_arg_size ) {
+                    if ( !(my_first == my_last) )
+                        recycle_to_reexecute();
+                    break;
+                }
+            }
+            if( k==0 ) {
+                destroy(t);
+                return NULL;
+            } else {
+                t.my_size = k;
+                return &t;
+            }
+        }
+
+        inline task* run( std::forward_iterator_tag* ) { return run_for_forward_iterator(); }
+
+        task* run_for_forward_iterator() {
+            typedef do_group_task_forward<Iterator, Body, Item> block_type;
+
+            Iterator first = my_first;
+            size_t k=0; 
+            while( !(my_first==my_last) ) {
+                ++my_first;
+                if( ++k==block_type::max_arg_size ) {
+                    if ( !(my_first==my_last) )
+                        recycle_to_reexecute();
+                    break;
+                }
+            }
+            return k==0 ? NULL : new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(first, k, my_feeder);
+        }
+        
+        inline task* run( std::random_access_iterator_tag* ) { return run_for_random_access_iterator(); }
+
+        task* run_for_random_access_iterator() {
+            typedef do_group_task_forward<Iterator, Body, Item> block_type;
+            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
+            
+            size_t k = static_cast<size_t>(my_last-my_first); 
+            if( k > block_type::max_arg_size ) {
+                Iterator middle = my_first + k/2;
+
+                empty_task& c = *new( allocate_continuation() ) empty_task;
+                do_task_iter& b = *new( c.allocate_child() ) do_task_iter(middle, my_last, my_feeder);
+                recycle_as_child_of(c);
+
+                my_last = middle;
+                c.set_ref_count(2);
+                c.spawn(b);
+                return this;
+            }else if( k != 0 ) {
+                task_list list;
+                task* t; 
+                size_t k1=0; 
+                for(;;) {
+                    t = new( allocate_child() ) iteration_type(my_first, my_feeder);
+                    ++my_first;
+                    if( ++k1==k ) break;
+                    list.push_back(*t);
+                }
+                set_ref_count(int(k+1));
+                spawn(list);
+                spawn_and_wait_for_all(*t);
+            }
+            return NULL;
+        }
+    }; // class do_task_iter
+
+    //! For internal use only.
+    /** Implements parallel iteration over a range.
+        @ingroup algorithms */
+    template<typename Iterator, typename Body, typename Item> 
+    void run_parallel_do( Iterator first, Iterator last, const Body& body
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context
+#endif
+        )
+    {
+        typedef do_task_iter<Iterator, Body, Item> root_iteration_task;
+#if __TBB_TASK_GROUP_CONTEXT
+        parallel_do_feeder_impl<Body, Item> feeder(context);
+#else
+        parallel_do_feeder_impl<Body, Item> feeder;
+#endif
+        feeder.my_body = &body;
+
+        root_iteration_task &t = *new( feeder.my_barrier->allocate_child() ) root_iteration_task(first, last, feeder);
+
+        feeder.my_barrier->set_ref_count(2);
+        feeder.my_barrier->spawn_and_wait_for_all(t);
+    }
+
+    //! For internal use only.
+    /** Detects types of Body's operator function arguments.
+        @ingroup algorithms **/
+    template<typename Iterator, typename Body, typename Item> 
+    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item) const
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context 
+#endif // __TBB_TASK_GROUP_CONTEXT 
+        )
+    {
+        run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
+#if __TBB_TASK_GROUP_CONTEXT
+            , context
+#endif // __TBB_TASK_GROUP_CONTEXT 
+            );
+    }
+
+    //! For internal use only.
+    /** Detects types of Body's operator function arguments.
+        @ingroup algorithms **/
+    template<typename Iterator, typename Body, typename Item, typename _Item> 
+    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item, parallel_do_feeder<_Item>&) const
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context 
+#endif // __TBB_TASK_GROUP_CONTEXT
+        )
+    {
+        run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
+#if __TBB_TASK_GROUP_CONTEXT
+            , context
+#endif // __TBB_TASK_GROUP_CONTEXT
+            );
+    }
+
+} // namespace internal
+//! @endcond
+
+
+/** \page parallel_do_body_req Requirements on parallel_do body
+    Class \c Body implementing the concept of parallel_do body must define:
+    - \code 
+        B::operator()( 
+                cv_item_type item,
+                parallel_do_feeder<item_type>& feeder
+        ) const
+        
+        OR
+
+        B::operator()( cv_item_type& item ) const
+      \endcode                                                      Process item. 
+                                                                    May be invoked concurrently  for the same \c this but different \c item.
+                                                        
+    - \code item_type( const item_type& ) \endcode 
+                                                                    Copy a work item.
+    - \code ~item_type() \endcode                            Destroy a work item
+**/
+
+/** \name parallel_do
+    See also requirements on \ref parallel_do_body_req "parallel_do Body". **/
+//@{
+//! Parallel iteration over a range, with optional addition of more work.
+/** @ingroup algorithms */
+template<typename Iterator, typename Body> 
+void parallel_do( Iterator first, Iterator last, const Body& body )
+{
+    if ( first == last )
+        return;
+#if __TBB_TASK_GROUP_CONTEXT
+    task_group_context context;
+#endif // __TBB_TASK_GROUP_CONTEXT
+    internal::select_parallel_do( first, last, body, &Body::operator()
+#if __TBB_TASK_GROUP_CONTEXT
+        , context
+#endif // __TBB_TASK_GROUP_CONTEXT
+        );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration over a range, with optional addition of more work and user-supplied context
+/** @ingroup algorithms */
+template<typename Iterator, typename Body> 
+void parallel_do( Iterator first, Iterator last, const Body& body, task_group_context& context  )
+{
+    if ( first == last )
+        return;
+    internal::select_parallel_do( first, last, body, &Body::operator(), context );
+}
+#endif // __TBB_TASK_GROUP_CONTEXT
+
+//@}
+
+} // namespace 
+
+#endif /* __TBB_parallel_do_H */
diff --git a/tbb/include/tbb/parallel_for.h b/tbb/include/tbb/parallel_for.h
new file mode 100644 (file)
index 0000000..31b9f98
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_for_H
+#define __TBB_parallel_for_H
+
+#include "task.h"
+#include "partitioner.h"
+#include "blocked_range.h"
+#include <new>
+#include "tbb_exception.h"
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+
+    //! Task type used in parallel_for
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner>
+    class start_for: public task {
+        Range my_range;
+        const Body my_body;
+        typename Partitioner::partition_type my_partition;
+        /*override*/ task* execute();
+
+        //! Constructor for root task.
+        start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
+            my_range(range),    
+            my_body(body),
+            my_partition(partitioner)
+        {
+        }
+        //! Splitting constructor used to generate children.
+        /** this becomes left child.  Newly constructed object is right child. */
+        start_for( start_for& parent_, split ) :
+            my_range(parent_.my_range,split()),    
+            my_body(parent_.my_body),
+            my_partition(parent_.my_partition,split())
+        {
+            my_partition.set_affinity(*this);
+        }
+        //! Update affinity info, if any.
+        /*override*/ void note_affinity( affinity_id id ) {
+            my_partition.note_affinity( id );
+        }
+    public:
+        static void run(  const Range& range, const Body& body, const Partitioner& partitioner ) {
+            if( !range.empty() ) {
+#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
+                start_for& a = *new(task::allocate_root()) start_for(range,body,const_cast<Partitioner&>(partitioner));
+#else
+                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
+                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
+                task_group_context context;
+                start_for& a = *new(task::allocate_root(context)) start_for(range,body,const_cast<Partitioner&>(partitioner));
+#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
+                task::spawn_root_and_wait(a);
+            }
+        }
+#if __TBB_TASK_GROUP_CONTEXT
+        static void run(  const Range& range, const Body& body, const Partitioner& partitioner, task_group_context& context ) {
+            if( !range.empty() ) {
+                start_for& a = *new(task::allocate_root(context)) start_for(range,body,const_cast<Partitioner&>(partitioner));
+                task::spawn_root_and_wait(a);
+            }
+        }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+    };
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_for<Range,Body,Partitioner>::execute() {
+        if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
+            my_body( my_range );
+            return my_partition.continue_after_execute_range(); 
+        } else {
+            empty_task& c = *new( this->allocate_continuation() ) empty_task;
+            recycle_as_child_of(c);
+            c.set_ref_count(2);
+            bool delay = my_partition.decide_whether_to_delay();
+            start_for& b = *new( c.allocate_child() ) start_for(*this,split());
+            my_partition.spawn_or_delay(delay,b);
+            return this;
+        }
+    } 
+} // namespace internal
+//! @endcond
+
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_for_body_req Requirements on parallel_for body
+    Class \c Body implementing the concept of parallel_for body must define:
+    - \code Body::Body( const Body& ); \endcode                 Copy constructor
+    - \code Body::~Body(); \endcode                             Destructor
+    - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
+**/
+
+/** \name parallel_for
+    See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
+//@{
+
+//! Parallel iteration over range with default partitioner. 
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body ) {
+    internal::start_for<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
+}
+
+//! Parallel iteration over range with simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
+    internal::start_for<Range,Body,simple_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with auto_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
+    internal::start_for<Range,Body,auto_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with affinity_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
+    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration over range with simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,simple_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration over range with auto_partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,auto_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration over range with affinity_partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+//@}
+
+//! @cond INTERNAL
+namespace internal {
+    //! Calls the function with values from range [begin, end) with a step provided
+template<typename Function, typename Index>
+class parallel_for_body : internal::no_assign {
+    const Function &my_func;
+    const Index my_begin;
+    const Index my_step; 
+public:
+    parallel_for_body( const Function& _func, Index& _begin, Index& _step) 
+        : my_func(_func), my_begin(_begin), my_step(_step) {}
+    
+    void operator()( tbb::blocked_range<Index>& r ) const {
+        for( Index i = r.begin(),  k = my_begin + i * my_step; i < r.end(); i++, k = k + my_step)
+            my_func( k );
+    }
+};
+} // namespace internal
+//! @endcond
+
+namespace strict_ppl {
+
+//@{
+//! Parallel iteration over a range of integers with a step provided
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f) {
+    tbb::task_group_context context;
+    parallel_for(first, last, step, f, context);
+}
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
+    if (step <= 0 )
+        internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
+    else if (last > first) {
+        // Above "else" avoids "potential divide by zero" warning on some platforms
+        Index end = (last - first - Index(1)) / step + Index(1);
+        tbb::blocked_range<Index> range(static_cast<Index>(0), end);
+        internal::parallel_for_body<Function, Index> body(f, first, step);
+        tbb::parallel_for(range, body, tbb::auto_partitioner(), context);
+    }
+}
+//! Parallel iteration over a range of integers with a default step value
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f) {
+    tbb::task_group_context context;
+    parallel_for(first, last, static_cast<Index>(1), f, context);
+}
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
+    parallel_for(first, last, static_cast<Index>(1), f, context);
+}
+
+//@}
+
+} // namespace strict_ppl
+
+using strict_ppl::parallel_for;
+
+} // namespace tbb
+
+#endif /* __TBB_parallel_for_H */
+
diff --git a/tbb/include/tbb/parallel_for_each.h b/tbb/include/tbb/parallel_for_each.h
new file mode 100644 (file)
index 0000000..e59ee76
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_for_each_H
+#define __TBB_parallel_for_each_H
+
+#include "parallel_do.h"
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    // The class calls user function in operator()
+    template <typename Function, typename Iterator>
+    class parallel_for_each_body : internal::no_assign {
+        const Function &my_func;
+    public:
+        parallel_for_each_body(const Function &_func) : my_func(_func) {}
+        parallel_for_each_body(const parallel_for_each_body<Function, Iterator> &_caller) : my_func(_caller.my_func) {}
+
+        void operator() ( typename std::iterator_traits<Iterator>::value_type& value ) const {
+            my_func(value);
+        }
+    };
+} // namespace internal
+//! @endcond
+
+/** \name parallel_for_each
+    **/
+//@{
+//! Calls function f for all items from [first, last) interval using user-supplied context
+/** @ingroup algorithms */
+template<typename InputIterator, typename Function>
+void parallel_for_each(InputIterator first, InputIterator last, const Function& f, task_group_context &context) {
+    internal::parallel_for_each_body<Function, InputIterator> body(f);
+
+    tbb::parallel_do (first, last, body, context);
+}
+
+//! Uses default context
+template<typename InputIterator, typename Function>
+void parallel_for_each(InputIterator first, InputIterator last, const Function& f) {
+    internal::parallel_for_each_body<Function, InputIterator> body(f);
+
+    tbb::parallel_do (first, last, body);
+}
+
+//@}
+
+} // namespace
+
+#endif /* __TBB_parallel_for_each_H */
diff --git a/tbb/include/tbb/parallel_invoke.h b/tbb/include/tbb/parallel_invoke.h
new file mode 100644 (file)
index 0000000..3303c41
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_invoke_H
+#define __TBB_parallel_invoke_H
+
+#include "task.h"
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    // Simple task object, executing user method
+    template<typename function>
+    class function_invoker : public task{
+    public:
+        function_invoker(const function& _function) : my_function(_function) {}
+    private:
+        const function &my_function;
+        /*override*/
+        task* execute()
+        {
+            my_function();
+            return NULL;
+        }
+    };
+
+    // The class spawns two or three child tasks
+    template <size_t N, typename function1, typename function2, typename function3>
+    class spawner : public task {
+    private:
+        const function1& my_func1;
+        const function2& my_func2;
+        const function3& my_func3;
+        bool is_recycled;
+
+        task* execute (){
+            if(is_recycled){
+                return NULL;
+            }else{
+                __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
+                set_ref_count(N);
+                recycle_as_safe_continuation();
+                internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
+                __TBB_ASSERT(invoker2, "Child task allocation failed");
+                spawn(*invoker2);
+                size_t n = N; // To prevent compiler warnings
+                if (n>2) {
+                    internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
+                    __TBB_ASSERT(invoker3, "Child task allocation failed");
+                    spawn(*invoker3);
+                }
+                my_func1();
+                is_recycled = true;
+                return NULL;
+            }
+        } // execute
+
+    public:
+        spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
+    };
+
+    // Creates and spawns child tasks
+    class parallel_invoke_helper : public empty_task {
+    public:
+        // Dummy functor class
+        class parallel_invoke_noop {
+        public:
+            void operator() () const {}
+        };
+        // Creates a helper object with user-defined number of children expected
+        parallel_invoke_helper(int number_of_children)
+        {
+            set_ref_count(number_of_children + 1);
+        }
+        // Adds child task and spawns it
+        template <typename function>
+        void add_child (const function &_func)
+        {
+            internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
+            __TBB_ASSERT(invoker, "Child task allocation failed");
+            spawn(*invoker);
+        }
+
+        // Adds a task with multiple child tasks and spawns it
+        // two arguments
+        template <typename function1, typename function2>
+        void add_children (const function1& _func1, const function2& _func2)
+        {
+            // The third argument is dummy, it is ignored actually.
+            parallel_invoke_noop noop;
+            internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
+            spawn(sub_root);
+        }
+        // three arguments
+        template <typename function1, typename function2, typename function3>
+        void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
+        {
+            internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
+            spawn(sub_root);
+        }
+
+        // Waits for all child tasks
+        template <typename F0>
+        void run_and_finish(const F0& f0)
+        {
+            internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
+            __TBB_ASSERT(invoker, "Child task allocation failed");
+            spawn_and_wait_for_all(*invoker);
+        }
+    };
+    // The class destroys root if exception occured as well as in normal case
+    class parallel_invoke_cleaner: internal::no_copy { 
+    public:
+        parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context) : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
+        {}
+        ~parallel_invoke_cleaner(){
+            root.destroy(root);
+        }
+        internal::parallel_invoke_helper& root;
+    };
+} // namespace internal
+//! @endcond
+
+/** \name parallel_invoke
+    **/
+//@{
+//! Executes a list of tasks in parallel and waits for all tasks to complete.
+/** @ingroup algorithms */
+
+// parallel_invoke with user-defined context
+// two arguments
+template<typename F0, typename F1 >
+void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
+    internal::parallel_invoke_cleaner cleaner(2, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_child(f1);
+
+    root.run_and_finish(f0);
+}
+
+// three arguments
+template<typename F0, typename F1, typename F2 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_child(f2);
+    root.add_child(f1);
+
+    root.run_and_finish(f0);
+}
+
+// four arguments
+template<typename F0, typename F1, typename F2, typename F3>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_child(f3);
+    root.add_child(f2);
+    root.add_child(f1);
+
+    root.run_and_finish(f0);
+}
+
+// five arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// six arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f5, f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// seven arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f6, f5, f4);
+    root.add_children(f3, f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// eight arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f7, f6, f5);
+    root.add_children(f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// nine arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f8, f7, f6);
+    root.add_children(f5, f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// ten arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8, typename F9>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f9, f8, f7);
+    root.add_children(f6, f5, f4);
+    root.add_children(f3, f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// two arguments
+template<typename F0, typename F1>
+void parallel_invoke(const F0& f0, const F1& f1) {
+    task_group_context context;
+    parallel_invoke<F0, F1>(f0, f1, context);
+}
+// three arguments
+template<typename F0, typename F1, typename F2>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
+    task_group_context context;
+    parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
+}
+// four arguments
+template<typename F0, typename F1, typename F2, typename F3 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
+}
+// five arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
+}
+// six arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
+}
+// seven arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6)
+{
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
+}
+// eigth arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, 
+         typename F5, typename F6, typename F7>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7)
+{
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
+}
+// nine arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8)
+{
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
+}
+// ten arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8, typename F9>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
+{
+    task_group_context context;
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
+}
+
+//@}
+
+} // namespace
+
+#endif /* __TBB_parallel_invoke_H */
diff --git a/tbb/include/tbb/parallel_reduce.h b/tbb/include/tbb/parallel_reduce.h
new file mode 100644 (file)
index 0000000..bef9d6c
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_reduce_H
+#define __TBB_parallel_reduce_H
+
+#include "task.h"
+#include "aligned_space.h"
+#include "partitioner.h"
+#include "tbb_profiling.h"
+#include <new>
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    //! 0 if root, 1 if a left child, 2 if a right child.
+    /** Represented as a char, not enum, for compactness. */
+    typedef char reduction_context;
+
+    //! Task type use to combine the partial results of parallel_reduce.
+    /** @ingroup algorithms */
+    template<typename Body>
+    class finish_reduce: public task {
+        //! Pointer to body, or NULL if the left child has not yet finished. 
+        Body* my_body;
+        bool has_right_zombie;
+        const reduction_context my_context;
+        aligned_space<Body,1> zombie_space;
+        finish_reduce( reduction_context context_ ) : 
+            my_body(NULL),
+            has_right_zombie(false),
+            my_context(context_)
+        {
+        }
+        task* execute() {
+            if( has_right_zombie ) {
+                // Right child was stolen.
+                Body* s = zombie_space.begin();
+                my_body->join( *s );
+                s->~Body();
+            }
+            if( my_context==1 )  // left child
+                itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
+            return NULL;
+        }
+        template<typename Range,typename Body_, typename Partitioner>
+        friend class start_reduce;
+    };
+
+    //! Task type used to split the work of parallel_reduce.
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner>
+    class start_reduce: public task {
+        typedef finish_reduce<Body> finish_type;
+        Body* my_body;
+        Range my_range;
+        typename Partitioner::partition_type my_partition;
+        reduction_context my_context;
+        /*override*/ task* execute();
+        template<typename Body_>
+        friend class finish_reduce;
+    
+        //! Constructor used for root task
+        start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
+            my_body(body),
+            my_range(range),
+            my_partition(partitioner),
+            my_context(0)
+        {
+        }
+        //! Splitting constructor used to generate children.
+        /** parent_ becomes left child.  Newly constructed object is right child. */
+        start_reduce( start_reduce& parent_, split ) :
+            my_body(parent_.my_body),
+            my_range(parent_.my_range,split()),
+            my_partition(parent_.my_partition,split()),
+            my_context(2)
+        {
+            my_partition.set_affinity(*this);
+            parent_.my_context = 1;
+        }
+        //! Update affinity info, if any
+        /*override*/ void note_affinity( affinity_id id ) {
+            my_partition.note_affinity( id );
+        }
+
+public:
+        static void run( const Range& range, Body& body, Partitioner& partitioner ) {
+            if( !range.empty() ) {
+#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
+                task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
+#else
+                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
+                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
+                task_group_context context;
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
+#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
+            }
+        }
+#if __TBB_TASK_GROUP_CONTEXT
+        static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
+            if( !range.empty() ) 
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
+        }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+    };
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_reduce<Range,Body,Partitioner>::execute() {
+        if( my_context==2 ) { // right child
+            finish_type* p = static_cast<finish_type*>(parent());
+            if( !itt_load_word_with_acquire(p->my_body) ) {
+                my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
+                p->has_right_zombie = true;
+            }
+        }
+        if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
+            (*my_body)( my_range );
+            if( my_context==1 ) 
+                itt_store_word_with_release(static_cast<finish_type*>(parent())->my_body, my_body );
+            return my_partition.continue_after_execute_range();
+        } else {
+            finish_type& c = *new( allocate_continuation()) finish_type(my_context);
+            recycle_as_child_of(c);
+            c.set_ref_count(2);    
+            bool delay = my_partition.decide_whether_to_delay();
+            start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
+            my_partition.spawn_or_delay(delay,b);
+            return this;
+        }
+    }
+
+    //! Auxiliary class for parallel_reduce; for internal use only.
+    /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
+        using given \ref parallel_reduce_lambda_req "anonymous function objects".
+     **/
+    /** @ingroup algorithms */
+    template<typename Range, typename Value, typename RealBody, typename Reduction>
+    class lambda_reduce_body {
+
+//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
+//       (might require some performance measurements)
+
+        const Value&     identity_element;
+        const RealBody&  my_real_body;
+        const Reduction& my_reduction;
+        Value            my_value;
+        lambda_reduce_body& operator= ( const lambda_reduce_body& other );
+    public:
+        lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
+            : identity_element(identity)
+            , my_real_body(body)
+            , my_reduction(reduction)
+            , my_value(identity)
+        { }
+        lambda_reduce_body( const lambda_reduce_body& other )
+            : identity_element(other.identity_element)
+            , my_real_body(other.my_real_body)
+            , my_reduction(other.my_reduction)
+            , my_value(other.my_value)
+        { }
+        lambda_reduce_body( lambda_reduce_body& other, tbb::split )
+            : identity_element(other.identity_element)
+            , my_real_body(other.my_real_body)
+            , my_reduction(other.my_reduction)
+            , my_value(other.identity_element)
+        { }
+        void operator()(Range& range) {
+            my_value = my_real_body(range, const_cast<const Value&>(my_value));
+        }
+        void join( lambda_reduce_body& rhs ) {
+            my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
+        }
+        Value result() const {
+            return my_value;
+        }
+    };
+
+} // namespace internal
+//! @endcond
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_reduce_body_req Requirements on parallel_reduce body
+    Class \c Body implementing the concept of parallel_reduce body must define:
+    - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
+                                                        Must be able to run concurrently with operator() and method \c join
+    - \code Body::~Body(); \endcode                     Destructor
+    - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
+                                                        and accumulating the result
+    - \code void Body::join( Body& b ); \endcode        Join results. 
+                                                        The result in \c b should be merged into the result of \c this
+**/
+
+/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
+    TO BE DOCUMENTED
+**/
+
+/** \name parallel_reduce
+    See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
+//@{
+
+//! Parallel iteration with reduction and default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body ) {
+    internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
+}
+
+//! Parallel iteration with reduction and simple_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
+}
+
+//! Parallel iteration with reduction and auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
+}
+
+//! Parallel iteration with reduction and affinity_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
+}
+
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
+}
+
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+/** parallel_reduce overloads that work with anonymous function objects
+    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
+
+//! Parallel iteration with reduction and default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
+                          ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and simple_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const simple_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
+                          ::run(range, body, partitioner );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const auto_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
+                          ::run( range, body, partitioner );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and affinity_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       affinity_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
+                                        ::run( range, body, partitioner );
+    return body.result();
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
+                          ::run( range, body, partitioner, context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
+                          ::run( range, body, partitioner, context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
+                                        ::run( range, body, partitioner, context );
+    return body.result();
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+//@}
+
+} // namespace tbb
+
+#endif /* __TBB_parallel_reduce_H */
+
diff --git a/tbb/include/tbb/parallel_scan.h b/tbb/include/tbb/parallel_scan.h
new file mode 100644 (file)
index 0000000..a4a02cb
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_scan_H
+#define __TBB_parallel_scan_H
+
+#include "task.h"
+#include "aligned_space.h"
+#include <new>
+#include "partitioner.h"
+
+namespace tbb {
+
+//! Used to indicate that the initial scan is being performed.
+/** @ingroup algorithms */
+struct pre_scan_tag {
+    static bool is_final_scan() {return false;}
+};
+
+//! Used to indicate that the final scan is being performed.
+/** @ingroup algorithms */
+struct final_scan_tag {
+    static bool is_final_scan() {return true;}
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+    //! Performs final scan for a leaf 
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class final_sum: public task {
+    public:
+        Body body;
+    private:
+        aligned_space<Range,1> range;
+        //! Where to put result of last subrange, or NULL if not last subrange.
+        Body* stuff_last;
+    public:
+        final_sum( Body& body_ ) :
+            body(body_,split())
+        {
+            poison_pointer(stuff_last);
+        }
+        ~final_sum() {
+            range.begin()->~Range();
+        }     
+        void finish_construction( const Range& range_, Body* stuff_last_ ) {
+            new( range.begin() ) Range(range_);
+            stuff_last = stuff_last_;
+        }
+    private:
+        /*override*/ task* execute() {
+            body( *range.begin(), final_scan_tag() );
+            if( stuff_last )
+                stuff_last->assign(body);
+            return NULL;
+        }
+    };       
+
+    //! Split work to be done in the scan.
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class sum_node: public task {
+        typedef final_sum<Range,Body> final_sum_type;
+    public:
+        final_sum_type *incoming; 
+        final_sum_type *body;
+        Body *stuff_last;
+    private:
+        final_sum_type *left_sum;
+        sum_node *left;
+        sum_node *right;     
+        bool left_is_final;
+        Range range;
+        sum_node( const Range range_, bool left_is_final_ ) : 
+            left_sum(NULL), 
+            left(NULL), 
+            right(NULL), 
+            left_is_final(left_is_final_), 
+            range(range_)
+        {
+            // Poison fields that will be set by second pass.
+            poison_pointer(body);
+            poison_pointer(incoming);
+        }
+        task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
+            if( !n ) {
+                f.recycle_as_child_of( *this );
+                f.finish_construction( range_, stuff_last_ );
+                return &f;
+            } else {
+                n->body = &f;
+                n->incoming = incoming_;
+                n->stuff_last = stuff_last_;
+                return n;
+            }
+        }
+        /*override*/ task* execute() {
+            if( body ) {
+                if( incoming )
+                    left_sum->body.reverse_join( incoming->body );
+                recycle_as_continuation();
+                sum_node& c = *this;
+                task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
+                task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
+                set_ref_count( (a!=NULL)+(b!=NULL) );
+                body = NULL; 
+                if( a ) spawn(*b);
+                else a = b;
+                return a;
+            } else {
+                return NULL;
+            }
+        }
+        template<typename Range_,typename Body_,typename Partitioner_>
+        friend class start_scan;
+
+        template<typename Range_,typename Body_>
+        friend class finish_scan;
+    };
+
+    //! Combine partial results
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class finish_scan: public task {
+        typedef sum_node<Range,Body> sum_node_type;
+        typedef final_sum<Range,Body> final_sum_type;
+        final_sum_type** const sum;
+        sum_node_type*& return_slot;
+    public:
+        final_sum_type* right_zombie;
+        sum_node_type& result;
+
+        /*override*/ task* execute() {
+            __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
+            if( result.left )
+                result.left_is_final = false;
+            if( right_zombie && sum ) 
+                ((*sum)->body).reverse_join(result.left_sum->body);
+            __TBB_ASSERT( !return_slot, NULL );
+            if( right_zombie || result.right ) {
+                return_slot = &result;
+            } else {
+                destroy( result );
+            }
+            if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
+            return NULL;
+        }
+
+        finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
+            sum(sum_),
+            return_slot(return_slot_), 
+            right_zombie(NULL),
+            result(result_)
+        {
+            __TBB_ASSERT( !return_slot, NULL );
+        }
+    };
+
+    //! Initial task to split the work
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner=simple_partitioner>
+    class start_scan: public task {
+        typedef sum_node<Range,Body> sum_node_type;
+        typedef final_sum<Range,Body> final_sum_type;
+        final_sum_type* body;
+        /** Non-null if caller is requesting total. */
+        final_sum_type** sum; 
+        sum_node_type** return_slot;
+        /** Null if computing root. */
+        sum_node_type* parent_sum;
+        bool is_final;
+        bool is_right_child;
+        Range range;
+        typename Partitioner::partition_type partition;
+        /*override*/ task* execute();
+    public:
+        start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
+            body(parent_.body),
+            sum(parent_.sum),
+            return_slot(&return_slot_),
+            parent_sum(parent_sum_),
+            is_final(parent_.is_final),
+            is_right_child(false),
+            range(parent_.range,split()),
+            partition(parent_.partition,split())
+        {
+            __TBB_ASSERT( !*return_slot, NULL );
+        }
+
+        start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
+            body(&body_),
+            sum(NULL),
+            return_slot(&return_slot_),
+            parent_sum(NULL),
+            is_final(true),
+            is_right_child(false),
+            range(range_),
+            partition(partitioner_)
+        {
+            __TBB_ASSERT( !*return_slot, NULL );
+        }
+
+        static void run(  const Range& range, Body& body, const Partitioner& partitioner ) {
+            if( !range.empty() ) {
+                typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
+                internal::sum_node<Range,Body>* root = NULL;
+                typedef internal::final_sum<Range,Body> final_sum_type;
+                final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
+                start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
+                    /*return_slot=*/root,
+                    range,
+                    *temp_body,
+                    partitioner );
+                task::spawn_root_and_wait( pass1 );
+                if( root ) {
+                    root->body = temp_body;
+                    root->incoming = NULL;
+                    root->stuff_last = &body;
+                    task::spawn_root_and_wait( *root );
+                } else {
+                    body.assign(temp_body->body);
+                    temp_body->finish_construction( range, NULL );
+                    temp_body->destroy(*temp_body);
+                }
+            }
+        }
+    };
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_scan<Range,Body,Partitioner>::execute() {
+        typedef internal::finish_scan<Range,Body> finish_pass1_type;
+        finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
+        // Inspecting p->result.left_sum would ordinarily be a race condition.
+        // But we inspect it only if we are not a stolen task, in which case we
+        // know that task assigning to p->result.left_sum has completed.
+        bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
+        if( treat_as_stolen ) {
+            // Invocation is for right child that has been really stolen or needs to be virtually stolen
+            p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
+            is_final = false;
+        }
+        task* next_task = NULL;
+        if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
+            if( is_final )
+                (body->body)( range, final_scan_tag() );
+            else if( sum )
+                (body->body)( range, pre_scan_tag() );
+            if( sum ) 
+                *sum = body;
+            __TBB_ASSERT( !*return_slot, NULL );
+        } else {
+            sum_node_type* result;
+            if( parent_sum ) 
+                result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
+            else
+                result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
+            finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
+            // Split off right child
+            start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
+            b.is_right_child = true;    
+            // Left child is recycling of *this.  Must recycle this before spawning b, 
+            // otherwise b might complete and decrement c.ref_count() to zero, which
+            // would cause c.execute() to run prematurely.
+            recycle_as_child_of(c);
+            c.set_ref_count(2);
+            c.spawn(b);
+            sum = &result->left_sum;
+            return_slot = &result->left;
+            is_right_child = false;
+            next_task = this;
+            parent_sum = result; 
+            __TBB_ASSERT( !*return_slot, NULL );
+        }
+        return next_task;
+    } 
+} // namespace internal
+//! @endcond
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_scan_body_req Requirements on parallel_scan body
+    Class \c Body implementing the concept of parallel_scan body must define:
+    - \code Body::Body( Body&, split ); \endcode    Splitting constructor.
+                                                    Split \c b so that \c this and \c b can accumulate separately
+    - \code Body::~Body(); \endcode                 Destructor
+    - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
+                                                    Preprocess iterations for range \c r
+    - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode 
+                                                    Do final processing for iterations of range \c r
+    - \code void Body::reverse_join( Body& a ); \endcode
+                                                    Merge preprocessing state of \c a into \c this, where \c a was 
+                                                    created earlier from \c b by b's splitting constructor
+**/
+
+/** \name parallel_scan
+    See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
+//@{
+
+//! Parallel prefix with default partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body ) {
+    internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
+}
+
+//! Parallel prefix with simple_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
+    internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel prefix with auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
+    internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
+}
+//@}
+
+} // namespace tbb
+
+#endif /* __TBB_parallel_scan_H */
+
diff --git a/tbb/include/tbb/parallel_sort.h b/tbb/include/tbb/parallel_sort.h
new file mode 100644 (file)
index 0000000..0050046
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_sort_H
+#define __TBB_parallel_sort_H
+
+#include "parallel_for.h"
+#include "blocked_range.h"
+#include <algorithm>
+#include <iterator>
+#include <functional>
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+
+//! Range used in quicksort to split elements into subranges based on a value.
+/** The split operation selects a splitter and places all elements less than or equal 
+    to the value in the first range and the remaining elements in the second range.
+    @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+class quick_sort_range: private no_assign {
+
+    inline size_t median_of_three(const RandomAccessIterator &array, size_t l, size_t m, size_t r) const {
+        return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp( array[l], array[r]) ? r : l ) ) 
+                                        : ( comp(array[r], array[m]) ? m : ( comp( array[r], array[l] ) ? r : l ) );
+    }
+
+    inline size_t pseudo_median_of_nine( const RandomAccessIterator &array, const quick_sort_range &range ) const {
+        size_t offset = range.size/8u;
+        return median_of_three(array, 
+                               median_of_three(array, 0, offset, offset*2),
+                               median_of_three(array, offset*3, offset*4, offset*5),
+                               median_of_three(array, offset*6, offset*7, range.size - 1) );
+
+    }
+
+public:
+
+    static const size_t grainsize = 500;
+    const Compare &comp;
+    RandomAccessIterator begin;
+    size_t size;
+
+    quick_sort_range( RandomAccessIterator begin_, size_t size_, const Compare &comp_ ) :
+        comp(comp_), begin(begin_), size(size_) {}
+
+    bool empty() const {return size==0;}
+    bool is_divisible() const {return size>=grainsize;}
+
+    quick_sort_range( quick_sort_range& range, split ) : comp(range.comp) {
+        RandomAccessIterator array = range.begin;
+        RandomAccessIterator key0 = range.begin; 
+        size_t m = pseudo_median_of_nine(array, range);
+        if (m) std::swap ( array[0], array[m] );
+
+        size_t i=0;
+        size_t j=range.size;
+        // Partition interval [i+1,j-1] with key *key0.
+        for(;;) {
+            __TBB_ASSERT( i<j, NULL );
+            // Loop must terminate since array[l]==*key0.
+            do {
+                --j;
+                __TBB_ASSERT( i<=j, "bad ordering relation?" );
+            } while( comp( *key0, array[j] ));
+            do {
+                __TBB_ASSERT( i<=j, NULL );
+                if( i==j ) goto partition;
+                ++i;
+            } while( comp( array[i],*key0 ));
+            if( i==j ) goto partition;
+            std::swap( array[i], array[j] );
+        }
+partition:
+        // Put the partition key were it belongs
+        std::swap( array[j], *key0 );
+        // array[l..j) is less or equal to key.
+        // array(j..r) is greater or equal to key.
+        // array[j] is equal to key
+        i=j+1;
+        begin = array+i;
+        size = range.size-i;
+        range.size = j;
+    }
+};
+
+//! Body class used to test if elements in a range are presorted
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+class quick_sort_pretest_body : internal::no_assign {
+    const Compare &comp;
+
+public:
+    quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
+
+    void operator()( const blocked_range<RandomAccessIterator>& range ) const {
+        task &my_task = task::self();
+        RandomAccessIterator my_end = range.end();
+
+        int i = 0;
+        for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i) {
+            if ( i%64 == 0 && my_task.is_cancelled() ) break;
+          
+            // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
+            if ( comp( *(k), *(k-1) ) ) {
+                my_task.cancel_group_execution();
+                break;
+            }
+        }
+    }
+
+};
+
+//! Body class used to sort elements in a range that is smaller than the grainsize.
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+struct quick_sort_body {
+    void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
+        //SerialQuickSort( range.begin, range.size, range.comp );
+        std::sort( range.begin, range.begin + range.size, range.comp );
+    }
+};
+
+//! Wrapper method to initiate the sort by calling parallel_for.
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
+    task_group_context my_context;
+    const int serial_cutoff = 9;
+
+    __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
+    RandomAccessIterator k;
+    for ( k = begin ; k != begin + serial_cutoff; ++k ) {
+        if ( comp( *(k+1), *k ) ) {
+            goto do_parallel_quick_sort;
+        }
+    }
+
+    parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
+                  quick_sort_pretest_body<RandomAccessIterator,Compare>(comp),
+                  auto_partitioner(),
+                  my_context);
+
+    if (my_context.is_group_execution_cancelled())
+do_parallel_quick_sort:
+        parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ), 
+                      quick_sort_body<RandomAccessIterator,Compare>(),
+                      auto_partitioner() );
+}
+
+} // namespace internal
+//! @endcond
+
+/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
+    Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
+    - \code void swap( T& x, T& y ) \endcode        Swaps \c x and \c y
+    - \code bool Compare::operator()( const T& x, const T& y ) \endcode
+                                                    True if x comes before y;
+**/
+
+/** \name parallel_sort
+    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
+//@{
+
+//! Sorts the data in [begin,end) using the given comparator 
+/** The compare function object is used for all comparisons between elements during sorting.
+    The compare object must define a bool operator() function.
+    @ingroup algorithms **/
+template<typename RandomAccessIterator, typename Compare>
+void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp) { 
+    const int min_parallel_size = 500; 
+    if( end > begin ) {
+        if (end - begin < min_parallel_size) { 
+            std::sort(begin, end, comp);
+        } else {
+            internal::parallel_quick_sort(begin, end, comp);
+        }
+    }
+}
+
+//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
+/** @ingroup algorithms **/
+template<typename RandomAccessIterator>
+inline void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { 
+    parallel_sort( begin, end, std::less< typename std::iterator_traits<RandomAccessIterator>::value_type >() );
+}
+
+//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
+/** @ingroup algorithms **/
+template<typename T>
+inline void parallel_sort( T * begin, T * end ) {
+    parallel_sort( begin, end, std::less< T >() );
+}   
+//@}
+
+
+} // namespace tbb
+
+#endif
+
diff --git a/tbb/include/tbb/parallel_while.h b/tbb/include/tbb/parallel_while.h
new file mode 100644 (file)
index 0000000..94f9795
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_parallel_while
+#define __TBB_parallel_while
+
+#include "task.h"
+#include <new>
+
+namespace tbb {
+
+template<typename Body>
+class parallel_while;
+
+//! @cond INTERNAL
+namespace internal {
+
+    template<typename Stream, typename Body> class while_task;
+
+    //! For internal use only.
+    /** Executes one iteration of a while.
+        @ingroup algorithms */
+    template<typename Body>
+    class while_iteration_task: public task {
+        const Body& my_body;
+        typename Body::argument_type my_value;
+        /*override*/ task* execute() {
+            my_body(my_value); 
+            return NULL;
+        }
+        while_iteration_task( const typename Body::argument_type& value, const Body& body ) : 
+            my_body(body), my_value(value)
+        {}
+        template<typename Body_> friend class while_group_task;
+        friend class tbb::parallel_while<Body>;
+    };
+
+    //! For internal use only
+    /** Unpacks a block of iterations.
+        @ingroup algorithms */
+    template<typename Body>
+    class while_group_task: public task {
+        static const size_t max_arg_size = 4;         
+        const Body& my_body;
+        size_t size;
+        typename Body::argument_type my_arg[max_arg_size];
+        while_group_task( const Body& body ) : my_body(body), size(0) {} 
+        /*override*/ task* execute() {
+            typedef while_iteration_task<Body> iteration_type;
+            __TBB_ASSERT( size>0, NULL );
+            task_list list;
+            task* t; 
+            size_t k=0; 
+            for(;;) {
+                t = new( allocate_child() ) iteration_type(my_arg[k],my_body); 
+                if( ++k==size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+        template<typename Stream, typename Body_> friend class while_task;
+    };
+    
+    //! For internal use only.
+    /** Gets block of iterations from a stream and packages them into a while_group_task.
+        @ingroup algorithms */
+    template<typename Stream, typename Body>
+    class while_task: public task {
+        Stream& my_stream;
+        const Body& my_body;
+        empty_task& my_barrier;
+        /*override*/ task* execute() {
+            typedef while_group_task<Body> block_type;
+            block_type& t = *new( allocate_additional_child_of(my_barrier) ) block_type(my_body);
+            size_t k=0; 
+            while( my_stream.pop_if_present(t.my_arg[k]) ) {
+                if( ++k==block_type::max_arg_size ) {
+                    // There might be more iterations.
+                    recycle_to_reexecute();
+                    break;
+                }
+            }
+            if( k==0 ) {
+                destroy(t);
+                return NULL;
+            } else {
+                t.size = k;
+                return &t;
+            }
+        }
+        while_task( Stream& stream, const Body& body, empty_task& barrier ) : 
+            my_stream(stream),
+            my_body(body),
+            my_barrier(barrier)
+        {} 
+        friend class tbb::parallel_while<Body>;
+    };
+
+} // namespace internal
+//! @endcond
+
+//! Parallel iteration over a stream, with optional addition of more work.
+/** The Body b has the requirement: \n
+        "b(v)"                      \n
+        "b.argument_type"           \n
+    where v is an argument_type
+    @ingroup algorithms */
+template<typename Body>
+class parallel_while: internal::no_copy {
+public:
+    //! Construct empty non-running parallel while.
+    parallel_while() : my_body(NULL), my_barrier(NULL) {}
+
+    //! Destructor cleans up data members before returning.
+    ~parallel_while() {
+        if( my_barrier ) {
+            my_barrier->destroy(*my_barrier);    
+            my_barrier = NULL;
+        }
+    }
+
+    //! Type of items
+    typedef typename Body::argument_type value_type;
+
+    //! Apply body.apply to each item in the stream.
+    /** A Stream s has the requirements \n
+         "S::value_type"                \n
+         "s.pop_if_present(value) is convertible to bool */
+    template<typename Stream>
+    void run( Stream& stream, const Body& body );
+
+    //! Add a work item while running.
+    /** Should be executed only by body.apply or a thread spawned therefrom. */
+    void add( const value_type& item );
+
+private:
+    const Body* my_body;
+    empty_task* my_barrier;
+};
+
+template<typename Body>
+template<typename Stream>
+void parallel_while<Body>::run( Stream& stream, const Body& body ) {
+    using namespace internal;
+    empty_task& barrier = *new( task::allocate_root() ) empty_task();
+    my_body = &body;
+    my_barrier = &barrier;
+    my_barrier->set_ref_count(2);
+    while_task<Stream,Body>& w = *new( my_barrier->allocate_child() ) while_task<Stream,Body>( stream, body, barrier );
+    my_barrier->spawn_and_wait_for_all(w);
+    my_barrier->destroy(*my_barrier);
+    my_barrier = NULL;
+    my_body = NULL;
+}
+
+template<typename Body>
+void parallel_while<Body>::add( const value_type& item ) {
+    __TBB_ASSERT(my_barrier,"attempt to add to parallel_while that is not running");
+    typedef internal::while_iteration_task<Body> iteration_type;
+    iteration_type& i = *new( task::allocate_additional_child_of(*my_barrier) ) iteration_type(item,*my_body);
+    task::self().spawn( i );
+}
+
+} // namespace 
+
+#endif /* __TBB_parallel_while */
diff --git a/tbb/include/tbb/partitioner.h b/tbb/include/tbb/partitioner.h
new file mode 100644 (file)
index 0000000..eaa95c6
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_partitioner_H
+#define __TBB_partitioner_H
+
+#include "task.h"
+
+namespace tbb {
+class affinity_partitioner;
+
+//! @cond INTERNAL
+namespace internal {
+size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
+
+//! Defines entry points into tbb run-time library;
+/** The entry points are the constructor and destructor. */
+class affinity_partitioner_base_v3: no_copy {
+    friend class tbb::affinity_partitioner;
+    //! Array that remembers affinities of tree positions to affinity_id.
+    /** NULL if my_size==0. */
+    affinity_id* my_array;
+    //! Number of elements in my_array.
+    size_t my_size;
+    //! Zeros the fields.
+    affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
+    //! Deallocates my_array.
+    ~affinity_partitioner_base_v3() {resize(0);}
+    //! Resize my_array.
+    /** Retains values if resulting size is the same. */
+    void __TBB_EXPORTED_METHOD resize( unsigned factor );
+    friend class affinity_partition_type;
+};
+
+//! Provides default methods for partition objects without affinity.
+class partition_type_base {
+public:
+    void set_affinity( task & ) {}
+    void note_affinity( task::affinity_id ) {}
+    task* continue_after_execute_range() {return NULL;}
+    bool decide_whether_to_delay() {return false;}
+    void spawn_or_delay( bool, task& b ) {
+        task::spawn(b);
+    }
+};
+
+class affinity_partition_type;
+
+template<typename Range, typename Body, typename Partitioner> class start_for;
+template<typename Range, typename Body, typename Partitioner> class start_reduce;
+template<typename Range, typename Body, typename Partitioner> class start_scan;
+
+} // namespace internal
+//! @endcond
+
+//! A simple partitioner 
+/** Divides the range until the range is not divisible. 
+    @ingroup algorithms */
+class simple_partitioner {
+public:
+    simple_partitioner() {}
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+
+    class partition_type: public internal::partition_type_base {
+    public:
+        bool should_execute_range(const task& ) {return false;}
+        partition_type( const simple_partitioner& ) {}
+        partition_type( const partition_type&, split ) {}
+    };
+};
+
+//! An auto partitioner 
+/** The range is initial divided into several large chunks.
+    Chunks are further subdivided into VICTIM_CHUNKS pieces if they are stolen and divisible.
+    @ingroup algorithms */
+class auto_partitioner {
+public:
+    auto_partitioner() {}
+
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+
+    class partition_type: public internal::partition_type_base {
+        size_t num_chunks;
+        static const size_t VICTIM_CHUNKS = 4;
+public:
+        bool should_execute_range(const task &t) {
+            if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
+                num_chunks = VICTIM_CHUNKS;
+            return num_chunks==1;
+        }
+        partition_type( const auto_partitioner& ) : num_chunks(internal::get_initial_auto_partitioner_divisor()) {}
+        partition_type( partition_type& pt, split ) {
+            num_chunks = pt.num_chunks /= 2u;
+        }
+    };
+};
+
+//! An affinity partitioner
+class affinity_partitioner: internal::affinity_partitioner_base_v3 {
+public:
+    affinity_partitioner() {}
+
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+
+    typedef internal::affinity_partition_type partition_type;
+    friend class internal::affinity_partition_type;
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+class affinity_partition_type: public no_copy {
+    //! Must be power of two
+    static const unsigned factor = 16;
+    static const size_t VICTIM_CHUNKS = 4;
+
+    internal::affinity_id* my_array;
+    task_list delay_list;
+    unsigned map_begin, map_end;
+    size_t num_chunks;
+public:
+    affinity_partition_type( affinity_partitioner& ap ) {
+        __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); 
+        ap.resize(factor);
+        my_array = ap.my_array;
+        map_begin = 0;
+        map_end = unsigned(ap.my_size);
+        num_chunks = internal::get_initial_auto_partitioner_divisor();
+    }
+    affinity_partition_type(affinity_partition_type& p, split) : my_array(p.my_array) {
+        __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
+        num_chunks = p.num_chunks /= 2;
+        unsigned e = p.map_end;
+        unsigned d = (e - p.map_begin)/2;
+        if( d>factor ) 
+            d &= 0u-factor;
+        map_end = e;
+        map_begin = p.map_end = e-d;
+    }
+
+    bool should_execute_range(const task &t) {
+        if( num_chunks < VICTIM_CHUNKS && t.is_stolen_task() )
+            num_chunks = VICTIM_CHUNKS;
+        return num_chunks == 1;
+    }
+
+    void set_affinity( task &t ) {
+        if( map_begin<map_end )
+            t.set_affinity( my_array[map_begin] );
+    }
+    void note_affinity( task::affinity_id id ) {
+        if( map_begin<map_end ) 
+            my_array[map_begin] = id;
+    }
+    task* continue_after_execute_range() {
+        task* first = NULL;
+        if( !delay_list.empty() ) {
+            first = &delay_list.pop_front();
+            while( !delay_list.empty() ) {
+                task::spawn(*first);
+                first = &delay_list.pop_front();
+            }
+        }
+        return first;
+    }
+    bool decide_whether_to_delay() {
+        // The possible underflow caused by "-1u" is deliberate
+        return (map_begin&(factor-1))==0 && map_end-map_begin-1u<factor;
+    }
+    void spawn_or_delay( bool delay, task& b ) {
+        if( delay )  
+            delay_list.push_back(b);
+        else 
+            task::spawn(b);
+    }
+
+    ~affinity_partition_type() {
+        // The delay_list can be non-empty if an exception is thrown.
+        while( !delay_list.empty() ) {
+            task& t = delay_list.pop_front();
+            t.destroy(t);
+        } 
+    }
+};
+
+} // namespace internal
+//! @endcond
+
+
+} // namespace tbb
+
+#endif /* __TBB_partitioner_H */
diff --git a/tbb/include/tbb/pipeline.h b/tbb/include/tbb/pipeline.h
new file mode 100644 (file)
index 0000000..60fe09b
--- /dev/null
@@ -0,0 +1,635 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_pipeline_H 
+#define __TBB_pipeline_H 
+
+#include "atomic.h"
+#include "task.h"
+#include "tbb_allocator.h"
+#include <cstddef>
+
+namespace tbb {
+
+class pipeline;
+class filter;
+
+//! @cond INTERNAL
+namespace internal {
+
+// The argument for PIPELINE_VERSION should be an integer between 2 and 9
+#define __TBB_PIPELINE_VERSION(x) (unsigned char)(x-2)<<1
+
+typedef unsigned long Token;
+typedef long tokendiff_t;
+class stage_task;
+class input_buffer;
+class pipeline_root_task;
+class pipeline_cleaner;
+
+} // namespace internal
+
+namespace interface6 {
+    template<typename T, typename U> class filter_t;
+
+    namespace internal {
+        class pipeline_proxy;
+    }
+}
+
+//! @endcond
+
+//! A stage in a pipeline.
+/** @ingroup algorithms */
+class filter: internal::no_copy {
+private:
+    //! Value used to mark "not in pipeline"
+    static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intptr_t(-1));}
+protected:    
+    //! The lowest bit 0 is for parallel vs. serial
+    static const unsigned char filter_is_serial = 0x1; 
+
+    //! 4th bit distinguishes ordered vs unordered filters.
+    /** The bit was not set for parallel filters in TBB 2.1 and earlier,
+        but is_ordered() function always treats parallel filters as out of order. */
+    static const unsigned char filter_is_out_of_order = 0x1<<4;  
+
+    //! 5th bit distinguishes thread-bound and regular filters.
+    static const unsigned char filter_is_bound = 0x1<<5;  
+
+    //! 6th bit marks input filters emitting small objects
+    static const unsigned char filter_may_emit_null = 0x1<<6;
+
+    //! 7th bit defines exception propagation mode expected by the application.
+    static const unsigned char exact_exception_propagation =
+#if TBB_USE_CAPTURED_EXCEPTION
+            0x0;
+#else
+            0x1<<7;
+#endif /* TBB_USE_CAPTURED_EXCEPTION */
+
+    static const unsigned char current_version = __TBB_PIPELINE_VERSION(5);
+    static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for version
+public:
+    enum mode {
+        //! processes multiple items in parallel and in no particular order
+        parallel = current_version | filter_is_out_of_order, 
+        //! processes items one at a time; all such filters process items in the same order
+        serial_in_order = current_version | filter_is_serial,
+        //! processes items one at a time and in no particular order
+        serial_out_of_order = current_version | filter_is_serial | filter_is_out_of_order,
+        //! @deprecated use serial_in_order instead
+        serial = serial_in_order
+    };
+protected:
+    filter( bool is_serial_ ) : 
+        next_filter_in_pipeline(not_in_pipeline()),
+        my_input_buffer(NULL),
+        my_filter_mode(static_cast<unsigned char>((is_serial_ ? serial : parallel) | exact_exception_propagation)),
+        prev_filter_in_pipeline(not_in_pipeline()),
+        my_pipeline(NULL),
+        next_segment(NULL)
+    {}
+    
+    filter( mode filter_mode ) :
+        next_filter_in_pipeline(not_in_pipeline()),
+        my_input_buffer(NULL),
+        my_filter_mode(static_cast<unsigned char>(filter_mode | exact_exception_propagation)),
+        prev_filter_in_pipeline(not_in_pipeline()),
+        my_pipeline(NULL),
+        next_segment(NULL)
+    {}
+
+    // signal end-of-input for concrete_filters
+    void __TBB_EXPORTED_METHOD set_end_of_input();
+
+public:
+    //! True if filter is serial.
+    bool is_serial() const {
+        return bool( my_filter_mode & filter_is_serial );
+    }  
+    
+    //! True if filter must receive stream in order.
+    bool is_ordered() const {
+        return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))==filter_is_serial;
+    }
+
+    //! True if filter is thread-bound.
+    bool is_bound() const {
+        return ( my_filter_mode & filter_is_bound )==filter_is_bound;
+    }
+
+    //! true if an input filter can emit null
+    bool object_may_be_null() { 
+        return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
+    }
+
+    //! Operate on an item from the input stream, and return item for output stream.
+    /** Returns NULL if filter is a sink. */
+    virtual void* operator()( void* item ) = 0;
+
+    //! Destroy filter.  
+    /** If the filter was added to a pipeline, the pipeline must be destroyed first. */
+    virtual __TBB_EXPORTED_METHOD ~filter();
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Destroys item if pipeline was cancelled.
+    /** Required to prevent memory leaks.
+        Note it can be called concurrently even for serial filters.*/
+    virtual void finalize( void* /*item*/ ) {};
+#endif
+
+private:
+    //! Pointer to next filter in the pipeline.
+    filter* next_filter_in_pipeline;
+
+    //! has the filter not yet processed all the tokens it will ever see?  
+    //  (pipeline has not yet reached end_of_input or this filter has not yet
+    //  seen the last token produced by input_filter)
+    bool has_more_work();
+
+    //! Buffer for incoming tokens, or NULL if not required.
+    /** The buffer is required if the filter is serial or follows a thread-bound one. */
+    internal::input_buffer* my_input_buffer;
+
+    friend class internal::stage_task;
+    friend class internal::pipeline_root_task;
+    friend class pipeline;
+    friend class thread_bound_filter;
+
+    //! Storage for filter mode and dynamically checked implementation version.
+    const unsigned char my_filter_mode;
+
+    //! Pointer to previous filter in the pipeline.
+    filter* prev_filter_in_pipeline;
+
+    //! Pointer to the pipeline.
+    pipeline* my_pipeline;
+
+    //! Pointer to the next "segment" of filters, or NULL if not required.
+    /** In each segment, the first filter is not thread-bound but follows a thread-bound one. */
+    filter* next_segment;
+};
+
+//! A stage in a pipeline served by a user thread.
+/** @ingroup algorithms */
+class thread_bound_filter: public filter {
+public:
+    enum result_type {
+        // item was processed
+        success,
+        // item is currently not available
+        item_not_available,
+        // there are no more items to process
+        end_of_stream
+    };
+protected:
+    thread_bound_filter(mode filter_mode): 
+         filter(static_cast<mode>(filter_mode | filter::filter_is_bound))
+    {}
+public:
+    //! If a data item is available, invoke operator() on that item.  
+    /** This interface is non-blocking.
+        Returns 'success' if an item was processed.
+        Returns 'item_not_available' if no item can be processed now 
+        but more may arrive in the future, or if token limit is reached. 
+        Returns 'end_of_stream' if there are no more items to process. */
+    result_type __TBB_EXPORTED_METHOD try_process_item(); 
+
+    //! Wait until a data item becomes available, and invoke operator() on that item.
+    /** This interface is blocking.
+        Returns 'success' if an item was processed.
+        Returns 'end_of_stream' if there are no more items to process.
+        Never returns 'item_not_available', as it blocks until another return condition applies. */
+    result_type __TBB_EXPORTED_METHOD process_item();
+
+private:
+    //! Internal routine for item processing
+    result_type internal_process_item(bool is_blocking);
+};
+
+//! A processing pipeline that applies filters to items.
+/** @ingroup algorithms */
+class pipeline {
+public:
+    //! Construct empty pipeline.
+    __TBB_EXPORTED_METHOD pipeline();
+
+    /** Though the current implementation declares the destructor virtual, do not rely on this 
+        detail.  The virtualness is deprecated and may disappear in future versions of TBB. */
+    virtual __TBB_EXPORTED_METHOD ~pipeline();
+
+    //! Add filter to end of pipeline.
+    void __TBB_EXPORTED_METHOD add_filter( filter& filter_ );
+
+    //! Run the pipeline to completion.
+    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens );
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Run the pipeline to completion with user-supplied context.
+    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens, tbb::task_group_context& context );
+#endif
+
+    //! Remove all filters from the pipeline.
+    void __TBB_EXPORTED_METHOD clear();
+
+private:
+    friend class internal::stage_task;
+    friend class internal::pipeline_root_task;
+    friend class filter;
+    friend class thread_bound_filter;
+    friend class internal::pipeline_cleaner;
+    friend class tbb::interface6::internal::pipeline_proxy;
+
+    //! Pointer to first filter in the pipeline.
+    filter* filter_list;
+
+    //! Pointer to location where address of next filter to be added should be stored.
+    filter* filter_end;
+
+    //! task who's reference count is used to determine when all stages are done.
+    task* end_counter;
+
+    //! Number of idle tokens waiting for input stage.
+    atomic<internal::Token> input_tokens;
+
+    //! Global counter of tokens 
+    atomic<internal::Token> token_counter;
+
+    //! False until fetch_input returns NULL.
+    bool end_of_input;
+
+    //! True if the pipeline contains a thread-bound filter; false otherwise.
+    bool has_thread_bound_filters;
+
+    //! Remove filter from pipeline.
+    void remove_filter( filter& filter_ );
+
+    //! Not used, but retained to satisfy old export files.
+    void __TBB_EXPORTED_METHOD inject_token( task& self );
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Does clean up if pipeline is cancelled or exception occured
+    void clear_filters();
+#endif
+};
+
+//------------------------------------------------------------------------
+// Support for lambda-friendly parallel_pipeline interface
+//------------------------------------------------------------------------
+
+namespace interface6 {
+
+namespace internal {
+    template<typename T, typename U, typename Body> class concrete_filter;
+}
+
+//! input_filter control to signal end-of-input for parallel_pipeline
+class flow_control {
+    bool is_pipeline_stopped;
+    flow_control() { is_pipeline_stopped = false; }
+    template<typename T, typename U, typename Body> friend class internal::concrete_filter;
+public:
+    void stop() { is_pipeline_stopped = true; }
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+template<typename T> struct is_large_object { enum { r = sizeof(T) > sizeof(void *) }; };
+
+template<typename T, bool> class token_helper;
+
+// large object helper (uses tbb_allocator)
+template<typename T>
+class token_helper<T, true> {
+    public:
+    typedef typename tbb::tbb_allocator<T> allocator;
+    typedef T* pointer;
+    typedef T value_type;
+    static pointer create_token(const value_type & source) {
+        pointer output_t = allocator().allocate(1);
+        return new (output_t) T(source);
+    }
+    static value_type & token(pointer & t) { return *t;}
+    static void * cast_to_void_ptr(pointer ref) { return (void *) ref; }
+    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
+    static void destroy_token(pointer token) {
+        allocator().destroy(token);
+        allocator().deallocate(token,1);
+    }
+};
+
+// pointer specialization
+template<typename T>
+class token_helper<T*, false > {
+    public:
+    typedef T* pointer;
+    typedef T* value_type;
+    static pointer create_token(const value_type & source) { return source; }
+    static value_type & token(pointer & t) { return t;}
+    static void * cast_to_void_ptr(pointer ref) { return (void *)ref; }
+    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
+    static void destroy_token( pointer /*token*/) {}
+};
+
+// small object specialization (converts void* to the correct type, passes objects directly.)
+template<typename T>
+class token_helper<T, false> {
+    typedef union {
+        T actual_value;
+        void * void_overlay;
+    } type_to_void_ptr_map;
+    public:
+    typedef T pointer;  // not really a pointer in this case.
+    typedef T value_type;
+    static pointer create_token(const value_type & source) {
+        return source; }
+    static value_type & token(pointer & t) { return t;}
+    static void * cast_to_void_ptr(pointer ref) { 
+        type_to_void_ptr_map mymap; 
+        mymap.void_overlay = NULL;
+        mymap.actual_value = ref; 
+        return mymap.void_overlay; 
+    }
+    static pointer cast_from_void_ptr(void * ref) { 
+        type_to_void_ptr_map mymap;
+        mymap.void_overlay = ref;
+        return mymap.actual_value;
+    }
+    static void destroy_token( pointer /*token*/) {}
+};
+
+template<typename T, typename U, typename Body>
+class concrete_filter: public tbb::filter {
+    const Body& my_body;
+    typedef token_helper<T,is_large_object<T>::r > t_helper;
+    typedef typename t_helper::pointer t_pointer;
+    typedef token_helper<U,is_large_object<U>::r > u_helper;
+    typedef typename u_helper::pointer u_pointer;
+
+    /*override*/ void* operator()(void* input) {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        u_pointer output_u = u_helper::create_token(my_body(t_helper::token(temp_input)));
+        t_helper::destroy_token(temp_input);
+        return u_helper::cast_to_void_ptr(output_u);
+    }
+
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+// input 
+template<typename U, typename Body>
+class concrete_filter<void,U,Body>: public filter {
+    const Body& my_body;
+    typedef token_helper<U, is_large_object<U>::r > u_helper;
+    typedef typename u_helper::pointer u_pointer;
+
+    /*override*/void* operator()(void*) {
+        flow_control control;
+        u_pointer output_u = u_helper::create_token(my_body(control));
+        if(control.is_pipeline_stopped) {
+            u_helper::destroy_token(output_u);
+            set_end_of_input();
+            return NULL;
+        }
+        return u_helper::cast_to_void_ptr(output_u);
+    }
+
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : 
+        filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit_null)),
+        my_body(body)
+    {}
+};
+
+template<typename T, typename Body>
+class concrete_filter<T,void,Body>: public filter {
+    const Body& my_body;
+    typedef token_helper<T, is_large_object<T>::r > t_helper;
+    typedef typename t_helper::pointer t_pointer;
+   
+    /*override*/ void* operator()(void* input) {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        my_body(t_helper::token(temp_input));
+        t_helper::destroy_token(temp_input);
+        return NULL;
+    }
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+template<typename Body>
+class concrete_filter<void,void,Body>: public filter {
+    const Body& my_body;
+    
+    /** Override privately because it is always called virtually */
+    /*override*/ void* operator()(void*) {
+        flow_control control;
+        my_body(control);
+        void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_t)-1; 
+        return output;
+    }
+public:
+    concrete_filter(filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+//! The class that represents an object of the pipeline for parallel_pipeline().
+/** It primarily serves as RAII class that deletes heap-allocated filter instances. */
+class pipeline_proxy {
+    tbb::pipeline my_pipe;
+public:
+    pipeline_proxy( const filter_t<void,void>& filter_chain );
+    ~pipeline_proxy() {
+        while( filter* f = my_pipe.filter_list ) 
+            delete f; // filter destructor removes it from the pipeline
+    }
+    tbb::pipeline* operator->() { return &my_pipe; }
+};
+
+//! Abstract base class that represents a node in a parse tree underlying a filter_t.
+/** These nodes are always heap-allocated and can be shared by filter_t objects. */
+class filter_node: tbb::internal::no_copy {
+    /** Count must be atomic because it is hidden state for user, but might be shared by threads. */
+    tbb::atomic<intptr_t> ref_count;
+protected:
+    filter_node() {
+        ref_count = 0;
+#ifdef __TBB_TEST_FILTER_NODE_COUNT
+        ++(__TBB_TEST_FILTER_NODE_COUNT);
+#endif
+    }
+public:
+    //! Add concrete_filter to pipeline 
+    virtual void add_to( pipeline& ) = 0;
+    //! Increment reference count
+    void add_ref() {++ref_count;}
+    //! Decrement reference count and delete if it becomes zero.
+    void remove_ref() {
+        __TBB_ASSERT(ref_count>0,"ref_count underflow");
+        if( --ref_count==0 ) 
+            delete this;
+    }
+    virtual ~filter_node() {
+#ifdef __TBB_TEST_FILTER_NODE_COUNT
+        --(__TBB_TEST_FILTER_NODE_COUNT);
+#endif
+    }
+};
+
+//! Node in parse tree representing result of make_filter.
+template<typename T, typename U, typename Body>
+class filter_node_leaf: public filter_node  {
+    const tbb::filter::mode mode;
+    const Body body;
+    /*override*/void add_to( pipeline& p ) {
+        concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,body);
+        p.add_filter( *f );
+    }
+public:
+    filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(b) {}
+};
+
+//! Node in parse tree representing join of two filters.
+class filter_node_join: public filter_node {
+    friend class filter_node; // to suppress GCC 3.2 warnings
+    filter_node& left;
+    filter_node& right;
+    /*override*/~filter_node_join() {
+       left.remove_ref();
+       right.remove_ref();
+    }
+    /*override*/void add_to( pipeline& p ) {
+        left.add_to(p);
+        right.add_to(p);
+    }
+public:
+    filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) {
+       left.add_ref();
+       right.add_ref();
+    }
+};
+
+} // namespace internal
+//! @endcond
+
+//! Create a filter to participate in parallel_pipeline
+template<typename T, typename U, typename Body>
+filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) {
+    return new internal::filter_node_leaf<T,U,Body>(mode, body);
+}
+
+template<typename T, typename V, typename U>
+filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& right) {
+    __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'");
+    __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as right argument of '&'");
+    return new internal::filter_node_join(*left.root,*right.root);
+}
+
+//! Class representing a chain of type-safe pipeline filters
+template<typename T, typename U>
+class filter_t {
+    typedef internal::filter_node filter_node;
+    filter_node* root;
+    filter_t( filter_node* root_ ) : root(root_) {
+        root->add_ref();
+    }
+    friend class internal::pipeline_proxy;
+    template<typename T_, typename U_, typename Body>
+    friend filter_t<T_,U_> make_filter(tbb::filter::mode, const Body& );
+    template<typename T_, typename V_, typename U_>
+    friend filter_t<T_,U_> operator& (const filter_t<T_,V_>& , const filter_t<V_,U_>& );
+public:
+    filter_t() : root(NULL) {}
+    filter_t( const filter_t<T,U>& rhs ) : root(rhs.root) {
+        if( root ) root->add_ref();
+    }
+    template<typename Body>
+    filter_t( tbb::filter::mode mode, const Body& body ) :
+        root( new internal::filter_node_leaf<T,U,Body>(mode, body) ) {
+        root->add_ref();
+    }
+
+    void operator=( const filter_t<T,U>& rhs ) {
+        // Order of operations below carefully chosen so that reference counts remain correct
+        // in unlikely event that remove_ref throws exception.
+        filter_node* old = root;
+        root = rhs.root; 
+        if( root ) root->add_ref();
+        if( old ) old->remove_ref();
+    }
+    ~filter_t() {
+        if( root ) root->remove_ref();
+    }
+    void clear() {
+        // Like operator= with filter_t() on right side.
+        if( root ) {
+            filter_node* old = root;
+            root = NULL;
+            old->remove_ref();
+        }
+    }
+};
+
+inline internal::pipeline_proxy::pipeline_proxy( const filter_t<void,void>& filter_chain ) : my_pipe() {
+    __TBB_ASSERT( filter_chain.root, "cannot apply parallel_pipeline to default-constructed filter_t"  );
+    filter_chain.root->add_to(my_pipe);
+}
+
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain
+#if __TBB_TASK_GROUP_CONTEXT
+    , tbb::task_group_context& context
+#endif
+    ) {
+    internal::pipeline_proxy pipe(filter_chain);
+    // tbb::pipeline::run() is called via the proxy
+    pipe->run(max_number_of_live_tokens
+#if __TBB_TASK_GROUP_CONTEXT
+              , context
+#endif
+    );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain) {
+    tbb::task_group_context context;
+    parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
+}
+#endif // __TBB_TASK_GROUP_CONTEXT
+
+} // interface6
+
+using interface6::flow_control;
+using interface6::filter_t;
+using interface6::make_filter;
+using interface6::parallel_pipeline;
+
+} // tbb
+
+#endif /* __TBB_pipeline_H */
diff --git a/tbb/include/tbb/queuing_mutex.h b/tbb/include/tbb/queuing_mutex.h
new file mode 100644 (file)
index 0000000..fe0d5d9
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_queuing_mutex_H
+#define __TBB_queuing_mutex_H
+
+#include "tbb_config.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <cstring>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+#include "atomic.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! Queuing lock with local-only spinning.
+/** @ingroup synchronization */
+class queuing_mutex {
+public:
+    //! Construct unacquired mutex.
+    queuing_mutex() {
+        q_tail = NULL;
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock: internal::no_copy {
+        //! Initialize fields to mean "no lock held".
+        void initialize() {
+            mutex = NULL;
+#if TBB_USE_ASSERT
+            internal::poison_pointer(next);
+#endif /* TBB_USE_ASSERT */
+        }
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() {initialize();}
+
+        //! Acquire lock on given mutex.
+        scoped_lock( queuing_mutex& m ) {
+            initialize();
+            acquire(m);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( mutex ) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void __TBB_EXPORTED_METHOD acquire( queuing_mutex& m );
+
+        //! Acquire lock on given mutex if free (i.e. non-blocking)
+        bool __TBB_EXPORTED_METHOD try_acquire( queuing_mutex& m );
+
+        //! Release lock.
+        void __TBB_EXPORTED_METHOD release();
+
+    private:
+        //! The pointer to the mutex owned, or NULL if not holding a mutex.
+        queuing_mutex* mutex;
+
+        //! The pointer to the next competitor for a mutex
+        scoped_lock *next;
+
+        //! The local spin-wait variable
+        /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of 
+            zero-initialization.  Defining it as an entire word instead of
+            a byte seems to help performance slightly. */
+        uintptr_t going;
+    };
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = true;
+
+    friend class scoped_lock;
+private:
+    //! The last competitor requesting the lock
+    atomic<scoped_lock*> q_tail;
+
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
+
+} // namespace tbb
+
+#endif /* __TBB_queuing_mutex_H */
diff --git a/tbb/include/tbb/queuing_rw_mutex.h b/tbb/include/tbb/queuing_rw_mutex.h
new file mode 100644 (file)
index 0000000..3c76332
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_queuing_rw_mutex_H
+#define __TBB_queuing_rw_mutex_H
+
+#include "tbb_config.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <cstring>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+#include "atomic.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! Reader-writer lock with local-only spinning.
+/** Adapted from Krieger, Stumm, et al. pseudocode at
+    http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93
+    @ingroup synchronization */
+class queuing_rw_mutex {
+public:
+    //! Construct unacquired mutex.
+    queuing_rw_mutex() {
+        q_tail = NULL;
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+    //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL
+    ~queuing_rw_mutex() {
+#if TBB_USE_ASSERT
+        __TBB_ASSERT( !q_tail, "destruction of an acquired mutex");
+#endif
+    }
+
+    class scoped_lock;
+    friend class scoped_lock;
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock: internal::no_copy {
+        //! Initialize fields
+        void initialize() {
+            mutex = NULL;
+#if TBB_USE_ASSERT
+            state = 0xFF; // Set to invalid state
+            internal::poison_pointer(next);
+            internal::poison_pointer(prev);
+#endif /* TBB_USE_ASSERT */
+        }
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() {initialize();}
+
+        //! Acquire lock on given mutex.
+        scoped_lock( queuing_rw_mutex& m, bool write=true ) {
+            initialize();
+            acquire(m,write);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( mutex ) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( queuing_rw_mutex& m, bool write=true );
+
+        //! Try acquire lock on given mutex.
+        bool try_acquire( queuing_rw_mutex& m, bool write=true );
+
+        //! Release lock.
+        void release();
+
+        //! Upgrade reader to become a writer.
+        /** Returns true if the upgrade happened without re-acquiring the lock and false if opposite */
+        bool upgrade_to_writer();
+
+        //! Downgrade writer to become a reader.
+        bool downgrade_to_reader();
+
+    private:
+        //! The pointer to the current mutex to work
+        queuing_rw_mutex* mutex;
+
+        //! The pointer to the previous and next competitors for a mutex
+        scoped_lock * prev, * next;
+
+        typedef unsigned char state_t;
+
+        //! State of the request: reader, writer, active reader, other service states
+        atomic<state_t> state;
+
+        //! The local spin-wait variable
+        /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */
+        unsigned char going;
+
+        //! A tiny internal lock
+        unsigned char internal_lock;
+
+        //! Acquire the internal lock
+        void acquire_internal_lock();
+
+        //! Try to acquire the internal lock
+        /** Returns true if lock was successfully acquired. */
+        bool try_acquire_internal_lock();
+
+        //! Release the internal lock
+        void release_internal_lock();
+
+        //! Wait for internal lock to be released
+        void wait_for_release_of_internal_lock();
+
+        //! A helper function
+        void unblock_or_wait_on_internal_lock( uintptr_t );
+    };
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    // Mutex traits
+    static const bool is_rw_mutex = true;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = true;
+
+private:
+    //! The last competitor requesting the lock
+    atomic<scoped_lock*> q_tail;
+
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(queuing_rw_mutex)
+
+} // namespace tbb
+
+#endif /* __TBB_queuing_rw_mutex_H */
diff --git a/tbb/include/tbb/reader_writer_lock.h b/tbb/include/tbb/reader_writer_lock.h
new file mode 100644 (file)
index 0000000..a5cace9
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_reader_writer_lock_H
+#define __TBB_reader_writer_lock_H
+
+#include "tbb_thread.h"
+#include "tbb_allocator.h"
+#include "atomic.h"
+
+namespace tbb {
+namespace interface5 {
+//! Writer-preference reader-writer lock with local-only spinning on readers.
+/** Loosely adapted from Mellor-Crummey and Scott pseudocode at
+    http://www.cs.rochester.edu/research/synchronization/pseudocode/rw.html#s_wp
+    @ingroup synchronization */
+    class reader_writer_lock : tbb::internal::no_copy {
+ public:
+    friend class scoped_lock;
+    friend class scoped_lock_read;
+    //! Status type for nodes associated with lock instances
+    /** waiting_nonblocking: the wait state for nonblocking lock
+          instances; for writes, these transition straight to active
+          states; for reads, these are unused.
+
+        waiting: the start and spin state for all lock instances; these will
+          transition to active state when appropriate.  Non-blocking write locks
+          transition from this state to waiting_nonblocking immediately.
+
+        active: the active state means that the lock instance holds
+          the lock; it will transition to invalid state during node deletion
+
+        invalid: the end state for all nodes; this is set in the
+          destructor so if we encounter this state, we are looking at
+          memory that has already been freed
+        
+        The state diagrams below describe the status transitions.
+        Single arrows indicate that the thread that owns the node is
+        responsible for the transition; double arrows indicate that
+        any thread could make the transition.
+
+        State diagram for scoped_lock status:
+
+        waiting ----------> waiting_nonblocking
+          |     _____________/       |
+          V    V                     V
+        active -----------------> invalid
+  
+        State diagram for scoped_lock_read status:
+
+        waiting 
+          |                        
+          V                        
+        active ----------------->invalid
+
+    */
+    enum status_t { waiting_nonblocking, waiting, active, invalid };
+
+    //! Constructs a new reader_writer_lock
+    reader_writer_lock() {
+        internal_construct();
+    }
+
+    //! Destructs a reader_writer_lock object
+    ~reader_writer_lock() {
+        internal_destroy();
+    }
+
+    //! The scoped lock pattern for write locks
+    /** Scoped locks help avoid the common problem of forgetting to release the lock.
+        This type is also serves as the node for queuing locks. */
+    class scoped_lock : tbb::internal::no_copy {
+    public:
+        friend class reader_writer_lock;
+        //! Construct with blocking attempt to acquire write lock on the passed-in lock 
+        scoped_lock(reader_writer_lock& lock) {
+            internal_construct(lock);
+        }
+        
+        //! Destructor, releases the write lock
+        ~scoped_lock() {
+            internal_destroy();
+        }
+
+        void* operator new(size_t s) {
+            return tbb::internal::allocate_via_handler_v3(s);
+        }
+        void operator delete(void* p) {
+            tbb::internal::deallocate_via_handler_v3(p);
+        }
+
+    private:
+        //! The pointer to the mutex to lock
+        reader_writer_lock *mutex;
+        //! The next queued competitor for the mutex
+        scoped_lock* next;
+        //! Status flag of the thread associated with this node
+        atomic<status_t> status;
+
+        //! Construct scoped_lock that is not holding lock
+        scoped_lock();
+
+        void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
+        void __TBB_EXPORTED_METHOD internal_destroy();
+   };
+
+    //! The scoped lock pattern for read locks
+    class scoped_lock_read : tbb::internal::no_copy {
+    public:
+        friend class reader_writer_lock;
+
+        //! Construct with blocking attempt to acquire read lock on the passed-in lock 
+        scoped_lock_read(reader_writer_lock& lock) {
+            internal_construct(lock);
+        }
+
+        //! Destructor, releases the read lock
+        ~scoped_lock_read() { 
+            internal_destroy();
+        }
+        
+        void* operator new(size_t s) {
+            return tbb::internal::allocate_via_handler_v3(s);
+        }
+        void operator delete(void* p) {
+            tbb::internal::deallocate_via_handler_v3(p);
+        }
+
+    private:
+        //! The pointer to the mutex to lock
+        reader_writer_lock *mutex;
+        //! The next queued competitor for the mutex
+        scoped_lock_read *next;
+        //! Status flag of the thread associated with this node
+        atomic<status_t> status;
+
+        //! Construct scoped_lock_read that is not holding lock
+        scoped_lock_read();
+
+        void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
+        void __TBB_EXPORTED_METHOD internal_destroy();
+    };
+    
+    //! Acquires the reader_writer_lock for write.  
+    /** If the lock is currently held in write mode by another
+        context, the writer will block by spinning on a local
+        variable.  Exceptions thrown: improper_lock The context tries
+        to acquire a reader_writer_lock that it already has write
+        ownership of.*/
+    void __TBB_EXPORTED_METHOD lock();
+
+    //! Tries to acquire the reader_writer_lock for write.   
+    /** This function does not block.  Return Value: True or false,
+        depending on whether the lock is acquired or not.  If the lock
+        is already held by this acquiring context, try_lock() returns
+        false. */
+    bool __TBB_EXPORTED_METHOD try_lock();
+
+    //! Acquires the reader_writer_lock for read.    
+    /** If the lock is currently held by a writer, this reader will
+        block and wait until the writers are done.  Exceptions thrown:
+        improper_lock The context tries to acquire a
+        reader_writer_lock that it already has write ownership of. */
+    void __TBB_EXPORTED_METHOD lock_read(); 
+
+    //! Tries to acquire the reader_writer_lock for read.  
+    /** This function does not block.  Return Value: True or false,
+        depending on whether the lock is acquired or not.  */
+    bool __TBB_EXPORTED_METHOD try_lock_read();
+
+    //! Releases the reader_writer_lock
+    void __TBB_EXPORTED_METHOD unlock();
+
+ private:
+    void __TBB_EXPORTED_METHOD internal_construct();
+    void __TBB_EXPORTED_METHOD internal_destroy();
+
+    //! Attempts to acquire write lock
+    /** If unavailable, spins in blocking case, returns false in non-blocking case. */
+    bool start_write(scoped_lock *);
+    //! Sets writer_head to w and attempts to unblock
+    void set_next_writer(scoped_lock *w);
+    //! Relinquishes write lock to next waiting writer or group of readers 
+    void end_write(scoped_lock *);
+    //! Checks if current thread holds write lock
+    bool is_current_writer();
+
+    //! Attempts to acquire read lock
+    /** If unavailable, spins in blocking case, returns false in non-blocking case. */
+    void start_read(scoped_lock_read *);
+    //! Unblocks pending readers
+    void unblock_readers();
+    //! Relinquishes read lock by decrementing counter; last reader wakes pending writer
+    void end_read();
+
+    //! The list of pending readers
+    atomic<scoped_lock_read*> reader_head;
+    //! The list of pending writers
+    atomic<scoped_lock*> writer_head;
+    //! The last node in the list of pending writers
+    atomic<scoped_lock*> writer_tail;
+    //! Writer that owns the mutex; tbb_thread::id() otherwise.
+    tbb_thread::id my_current_writer;
+    //! Status of mutex
+    atomic<unsigned> rdr_count_and_flags;
+};
+
+} // namespace interface5
+
+using interface5::reader_writer_lock;
+
+} // namespace tbb
+
+#endif /* __TBB_reader_writer_lock_H */
diff --git a/tbb/include/tbb/recursive_mutex.h b/tbb/include/tbb/recursive_mutex.h
new file mode 100644 (file)
index 0000000..185fd19
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_recursive_mutex_H
+#define __TBB_recursive_mutex_H
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#endif /* _WIN32||_WIN64 */
+
+#include <new>
+#include "aligned_space.h"
+#include "tbb_stddef.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+//! Mutex that allows recursive mutex acquisition.
+/** Mutex that allows recursive mutex acquisition.
+    @ingroup synchronization */
+class recursive_mutex {
+public:
+    //! Construct unacquired recursive_mutex.
+    recursive_mutex() {
+#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
+        internal_construct();
+#else
+  #if _WIN32||_WIN64
+        InitializeCriticalSection(&impl);
+  #else
+        pthread_mutexattr_t mtx_attr;
+        int error_code = pthread_mutexattr_init( &mtx_attr );
+        if( error_code )
+            tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutexattr_init failed");
+
+        pthread_mutexattr_settype( &mtx_attr, PTHREAD_MUTEX_RECURSIVE );
+        error_code = pthread_mutex_init( &impl, &mtx_attr );
+        if( error_code )
+            tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutex_init failed");
+
+        pthread_mutexattr_destroy( &mtx_attr );
+  #endif /* _WIN32||_WIN64*/
+#endif /* TBB_USE_ASSERT */
+    };
+
+    ~recursive_mutex() {
+#if TBB_USE_ASSERT
+        internal_destroy();
+#else
+  #if _WIN32||_WIN64
+        DeleteCriticalSection(&impl);
+  #else
+        pthread_mutex_destroy(&impl); 
+
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    };
+
+    class scoped_lock;
+    friend class scoped_lock;
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock: internal::no_copy {
+    public:
+        //! Construct lock that has not acquired a recursive_mutex. 
+        scoped_lock() : my_mutex(NULL) {};
+
+        //! Acquire lock on given mutex.
+        scoped_lock( recursive_mutex& mutex ) {
+#if TBB_USE_ASSERT
+            my_mutex = &mutex; 
+#endif /* TBB_USE_ASSERT */
+            acquire( mutex );
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( my_mutex ) 
+                release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( recursive_mutex& mutex ) {
+#if TBB_USE_ASSERT
+            internal_acquire( mutex );
+#else
+            my_mutex = &mutex;
+            mutex.lock();
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Try acquire lock on given recursive_mutex.
+        bool try_acquire( recursive_mutex& mutex ) {
+#if TBB_USE_ASSERT
+            return internal_try_acquire( mutex );
+#else
+            bool result = mutex.try_lock();
+            if( result )
+                my_mutex = &mutex;
+            return result;
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Release lock
+        void release() {
+#if TBB_USE_ASSERT
+            internal_release();
+#else
+            my_mutex->unlock();
+            my_mutex = NULL;
+#endif /* TBB_USE_ASSERT */
+        }
+
+    private:
+        //! The pointer to the current recursive_mutex to work
+        recursive_mutex* my_mutex;
+
+        //! All checks from acquire using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_acquire( recursive_mutex& m );
+
+        //! All checks from try_acquire using mutex.state were moved here
+        bool __TBB_EXPORTED_METHOD internal_try_acquire( recursive_mutex& m );
+
+        //! All checks from release using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_release();
+
+        friend class recursive_mutex;
+    };
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = true;
+    static const bool is_fair_mutex = false;
+
+    // C++0x compatibility interface
+    
+    //! Acquire lock
+    void lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        new(tmp.begin()) scoped_lock(*this);
+#else
+  #if _WIN32||_WIN64
+        EnterCriticalSection(&impl);
+  #else
+        pthread_mutex_lock(&impl);
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
+#else        
+  #if _WIN32||_WIN64
+        return TryEnterCriticalSection(&impl)!=0;
+  #else
+        return pthread_mutex_trylock(&impl)==0;
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Release lock
+    void unlock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock,1> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = this;
+        s.internal_release();
+#else
+  #if _WIN32||_WIN64
+        LeaveCriticalSection(&impl);
+  #else
+        pthread_mutex_unlock(&impl);
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Return native_handle
+  #if _WIN32||_WIN64
+    typedef LPCRITICAL_SECTION native_handle_type;
+  #else
+    typedef pthread_mutex_t* native_handle_type;
+  #endif
+    native_handle_type native_handle() { return (native_handle_type) &impl; }
+
+private:
+#if _WIN32||_WIN64
+    CRITICAL_SECTION impl;
+    enum state_t {
+        INITIALIZED=0x1234,
+        DESTROYED=0x789A,
+    } state;
+#else
+    pthread_mutex_t impl;
+#endif /* _WIN32||_WIN64 */
+
+    //! All checks from mutex constructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    //! All checks from mutex destructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_destroy();
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(recursive_mutex)
+
+} // namespace tbb 
+
+#endif /* __TBB_recursive_mutex_H */
diff --git a/tbb/include/tbb/scalable_allocator.h b/tbb/include/tbb/scalable_allocator.h
new file mode 100644 (file)
index 0000000..65f80b5
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_scalable_allocator_H
+#define __TBB_scalable_allocator_H
+/** @file */
+
+#include <stddef.h> /* Need ptrdiff_t and size_t from here. */
+
+#if !defined(__cplusplus) && __ICC==1100
+    #pragma warning (push)
+    #pragma warning (disable: 991)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#if _MSC_VER >= 1400
+#define __TBB_EXPORTED_FUNC   __cdecl
+#else
+#define __TBB_EXPORTED_FUNC
+#endif
+
+/** The "malloc" analogue to allocate block of memory of size bytes.
+  * @ingroup memory_allocation */
+void * __TBB_EXPORTED_FUNC scalable_malloc (size_t size);
+
+/** The "free" analogue to discard a previously allocated piece of memory.
+    @ingroup memory_allocation */
+void   __TBB_EXPORTED_FUNC scalable_free (void* ptr);
+
+/** The "realloc" analogue complementing scalable_malloc.
+    @ingroup memory_allocation */
+void * __TBB_EXPORTED_FUNC scalable_realloc (void* ptr, size_t size);
+
+/** The "calloc" analogue complementing scalable_malloc.
+    @ingroup memory_allocation */
+void * __TBB_EXPORTED_FUNC scalable_calloc (size_t nobj, size_t size);
+
+/** The "posix_memalign" analogue.
+    @ingroup memory_allocation */
+int __TBB_EXPORTED_FUNC scalable_posix_memalign (void** memptr, size_t alignment, size_t size);
+
+/** The "_aligned_malloc" analogue.
+    @ingroup memory_allocation */
+void * __TBB_EXPORTED_FUNC scalable_aligned_malloc (size_t size, size_t alignment);
+
+/** The "_aligned_realloc" analogue.
+    @ingroup memory_allocation */
+void * __TBB_EXPORTED_FUNC scalable_aligned_realloc (void* ptr, size_t size, size_t alignment);
+
+/** The "_aligned_free" analogue.
+    @ingroup memory_allocation */
+void __TBB_EXPORTED_FUNC scalable_aligned_free (void* ptr);
+
+/** The analogue of _msize/malloc_size/malloc_usable_size.
+    Returns the usable size of a memory block previously allocated by scalable_*,
+    or 0 (zero) if ptr does not point to such a block.
+    @ingroup memory_allocation */
+size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#ifdef __cplusplus
+
+#include <new>      /* To use new with the placement argument */
+
+/* Ensure that including this header does not cause implicit linkage with TBB */
+#ifndef __TBB_NO_IMPLICIT_LINKAGE
+    #define __TBB_NO_IMPLICIT_LINKAGE 1
+    #include "tbb_stddef.h"
+    #undef  __TBB_NO_IMPLICIT_LINKAGE
+#else
+    #include "tbb_stddef.h"
+#endif
+
+
+namespace tbb {
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
+    #pragma warning (push)
+    #pragma warning (disable: 4100)
+#endif
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** The members are ordered the same way they are in section 20.4.1
+    of the ISO C++ standard.
+    @ingroup memory_allocation */
+template<typename T>
+class scalable_allocator {
+public:
+    typedef typename internal::allocator_type<T>::value_type value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<class U> struct rebind {
+        typedef scalable_allocator<U> other;
+    };
+
+    scalable_allocator() throw() {}
+    scalable_allocator( const scalable_allocator& ) throw() {}
+    template<typename U> scalable_allocator(const scalable_allocator<U>&) throw() {}
+
+    pointer address(reference x) const {return &x;}
+    const_pointer address(const_reference x) const {return &x;}
+
+    //! Allocate space for n objects.
+    pointer allocate( size_type n, const void* /*hint*/ =0 ) {
+        return static_cast<pointer>( scalable_malloc( n * sizeof(value_type) ) );
+    }
+
+    //! Free previously allocated block of memory
+    void deallocate( pointer p, size_type ) {
+        scalable_free( p );
+    }
+
+    //! Largest value for which method allocate might succeed.
+    size_type max_size() const throw() {
+        size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type);
+        return (absolutemax > 0 ? absolutemax : 1);
+    }
+    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
+    void destroy( pointer p ) {p->~value_type();}
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4100 is back
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<>
+class scalable_allocator<void> {
+public:
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+    template<class U> struct rebind {
+        typedef scalable_allocator<U> other;
+    };
+};
+
+template<typename T, typename U>
+inline bool operator==( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return true;}
+
+template<typename T, typename U>
+inline bool operator!=( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return false;}
+
+} // namespace tbb
+
+#if _MSC_VER
+    #if __TBB_BUILD && !defined(__TBBMALLOC_NO_IMPLICIT_LINKAGE)
+        #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
+    #endif
+
+    #if !__TBBMALLOC_NO_IMPLICIT_LINKAGE
+        #ifdef _DEBUG
+            #pragma comment(lib, "tbbmalloc_debug.lib")
+        #else
+            #pragma comment(lib, "tbbmalloc.lib")
+        #endif
+    #endif
+
+
+#endif
+
+#endif /* __cplusplus */
+
+#if !defined(__cplusplus) && __ICC==1100
+    #pragma warning (pop)
+#endif // ICC 11.0 warning 991 is back
+
+#endif /* __TBB_scalable_allocator_H */
diff --git a/tbb/include/tbb/spin_mutex.h b/tbb/include/tbb/spin_mutex.h
new file mode 100644 (file)
index 0000000..140c6e9
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_spin_mutex_H
+#define __TBB_spin_mutex_H
+
+#include <cstddef>
+#include <new>
+#include "aligned_space.h"
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! A lock that occupies a single byte.
+/** A spin_mutex is a spin mutex that fits in a single byte.  
+    It should be used only for locking short critical sections 
+    (typically less than 20 instructions) when fairness is not an issue.  
+    If zero-initialized, the mutex is considered unheld.
+    @ingroup synchronization */
+class spin_mutex {
+    //! 0 if lock is released, 1 if lock is acquired.
+    __TBB_Byte flag;
+
+public:
+    //! Construct unacquired lock.
+    /** Equivalent to zero-initialization of *this. */
+    spin_mutex() : flag(0) {
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock : internal::no_copy {
+    private:
+        //! Points to currently held mutex, or NULL if no lock is held.
+        spin_mutex* my_mutex; 
+
+        //! Value to store into spin_mutex::flag to unlock the mutex.
+        uintptr_t my_unlock_value;
+
+        //! Like acquire, but with ITT instrumentation.
+        void __TBB_EXPORTED_METHOD internal_acquire( spin_mutex& m );
+
+        //! Like try_acquire, but with ITT instrumentation.
+        bool __TBB_EXPORTED_METHOD internal_try_acquire( spin_mutex& m );
+
+        //! Like release, but with ITT instrumentation.
+        void __TBB_EXPORTED_METHOD internal_release();
+
+        friend class spin_mutex;
+
+    public:
+        //! Construct without acquiring a mutex.
+        scoped_lock() : my_mutex(NULL), my_unlock_value(0) {}
+
+        //! Construct and acquire lock on a mutex.
+        scoped_lock( spin_mutex& m ) { 
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            my_mutex=NULL;
+            internal_acquire(m);
+#else
+            my_unlock_value = __TBB_LockByte(m.flag);
+            my_mutex=&m;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
+        }
+
+        //! Acquire lock.
+        void acquire( spin_mutex& m ) {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            internal_acquire(m);
+#else
+            my_unlock_value = __TBB_LockByte(m.flag);
+            my_mutex = &m;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
+        }
+
+        //! Try acquiring lock (non-blocking)
+        /** Return true if lock acquired; false otherwise. */
+        bool try_acquire( spin_mutex& m ) {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            return internal_try_acquire(m);
+#else
+            bool result = __TBB_TryLockByte(m.flag);
+            if( result ) {
+                my_unlock_value = 0;
+                my_mutex = &m;
+            }
+            return result;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
+        }
+
+        //! Release lock
+        void release() {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            internal_release();
+#else
+            __TBB_UnlockByte(my_mutex->flag, static_cast<__TBB_Byte>(my_unlock_value));
+            my_mutex = NULL;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+        }
+
+        //! Destroy lock.  If holding a lock, releases the lock first.
+        ~scoped_lock() {
+            if( my_mutex ) {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+                internal_release();
+#else
+                __TBB_UnlockByte(my_mutex->flag, static_cast<__TBB_Byte>(my_unlock_value));
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+            }
+        }
+    };
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+#if TBB_USE_THREADING_TOOLS
+        aligned_space<scoped_lock,1> tmp;
+        new(tmp.begin()) scoped_lock(*this);
+#else
+        __TBB_LockByte(flag);
+#endif /* TBB_USE_THREADING_TOOLS*/
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+#if TBB_USE_THREADING_TOOLS
+        aligned_space<scoped_lock,1> tmp;
+        return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
+#else
+        return __TBB_TryLockByte(flag);
+#endif /* TBB_USE_THREADING_TOOLS*/
+    }
+
+    //! Release lock
+    void unlock() {
+#if TBB_USE_THREADING_TOOLS
+        aligned_space<scoped_lock,1> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = this;
+        s.my_unlock_value = 0;
+        s.internal_release();
+#else
+        __TBB_store_with_release(flag, 0);
+#endif /* TBB_USE_THREADING_TOOLS */
+    }
+
+    friend class scoped_lock;
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(spin_mutex)
+
+} // namespace tbb
+
+#endif /* __TBB_spin_mutex_H */
diff --git a/tbb/include/tbb/spin_rw_mutex.h b/tbb/include/tbb/spin_rw_mutex.h
new file mode 100644 (file)
index 0000000..cfe806e
--- /dev/null
@@ -0,0 +1,228 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_spin_rw_mutex_H
+#define __TBB_spin_rw_mutex_H
+
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+class spin_rw_mutex_v3;
+typedef spin_rw_mutex_v3 spin_rw_mutex;
+
+//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference
+/** @ingroup synchronization */
+class spin_rw_mutex_v3 {
+    //! @cond INTERNAL
+
+    //! Internal acquire write lock.
+    bool __TBB_EXPORTED_METHOD internal_acquire_writer();
+
+    //! Out of line code for releasing a write lock.  
+    /** This code is has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
+    void __TBB_EXPORTED_METHOD internal_release_writer();
+
+    //! Internal acquire read lock.
+    void __TBB_EXPORTED_METHOD internal_acquire_reader();
+
+    //! Internal upgrade reader to become a writer.
+    bool __TBB_EXPORTED_METHOD internal_upgrade();
+
+    //! Out of line code for downgrading a writer to a reader.   
+    /** This code is has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
+    void __TBB_EXPORTED_METHOD internal_downgrade();
+
+    //! Internal release read lock.
+    void __TBB_EXPORTED_METHOD internal_release_reader();
+
+    //! Internal try_acquire write lock.
+    bool __TBB_EXPORTED_METHOD internal_try_acquire_writer();
+
+    //! Internal try_acquire read lock.
+    bool __TBB_EXPORTED_METHOD internal_try_acquire_reader();
+
+    //! @endcond
+public:
+    //! Construct unacquired mutex.
+    spin_rw_mutex_v3() : state(0) {
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+#if TBB_USE_ASSERT
+    //! Destructor asserts if the mutex is acquired, i.e. state is zero.
+    ~spin_rw_mutex_v3() {
+        __TBB_ASSERT( !state, "destruction of an acquired mutex");
+    };
+#endif /* TBB_USE_ASSERT */
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock : internal::no_copy {
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() : mutex(NULL), is_writer(false) {}
+
+        //! Acquire lock on given mutex.
+        scoped_lock( spin_rw_mutex& m, bool write = true ) : mutex(NULL) {
+            acquire(m, write);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( mutex ) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( spin_rw_mutex& m, bool write = true ) {
+            __TBB_ASSERT( !mutex, "holding mutex already" );
+            is_writer = write; 
+            mutex = &m;
+            if( write ) mutex->internal_acquire_writer();
+            else        mutex->internal_acquire_reader();
+        }
+
+        //! Upgrade reader to become a writer.
+        /** Returns true if the upgrade happened without re-acquiring the lock and false if opposite */
+        bool upgrade_to_writer() {
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( !is_writer, "not a reader" );
+            is_writer = true; 
+            return mutex->internal_upgrade();
+        }
+
+        //! Release lock.
+        void release() {
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            spin_rw_mutex *m = mutex; 
+            mutex = NULL;
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            if( is_writer ) m->internal_release_writer();
+            else            m->internal_release_reader();
+#else
+            if( is_writer ) __TBB_AtomicAND( &m->state, READERS ); 
+            else            __TBB_FetchAndAddWrelease( &m->state, -(intptr_t)ONE_READER);
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+        }
+
+        //! Downgrade writer to become a reader.
+        bool downgrade_to_reader() {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( is_writer, "not a writer" );
+            mutex->internal_downgrade();
+#else
+            __TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER));
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+            is_writer = false;
+
+            return true;
+        }
+
+        //! Try acquire lock on given mutex.
+        bool try_acquire( spin_rw_mutex& m, bool write = true ) {
+            __TBB_ASSERT( !mutex, "holding mutex already" );
+            bool result;
+            is_writer = write; 
+            result = write? m.internal_try_acquire_writer()
+                          : m.internal_try_acquire_reader();
+            if( result ) 
+                mutex = &m;
+            return result;
+        }
+
+    protected:
+        //! The pointer to the current mutex that is held, or NULL if no mutex is held.
+        spin_rw_mutex* mutex;
+
+        //! If mutex!=NULL, then is_writer is true if holding a writer lock, false if holding a reader lock.
+        /** Not defined if not holding a lock. */
+        bool is_writer;
+    };
+
+    // Mutex traits
+    static const bool is_rw_mutex = true;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire writer lock
+    void lock() {internal_acquire_writer();}
+
+    //! Try acquiring writer lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {return internal_try_acquire_writer();}
+
+    //! Release lock
+    void unlock() {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+        if( state&WRITER ) internal_release_writer();
+        else               internal_release_reader();
+#else
+        if( state&WRITER ) __TBB_AtomicAND( &state, READERS ); 
+        else               __TBB_FetchAndAddWrelease( &state, -(intptr_t)ONE_READER);
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+    }
+
+    // Methods for reader locks that resemble ISO C++0x compatibility methods.
+
+    //! Acquire reader lock
+    void lock_read() {internal_acquire_reader();}
+
+    //! Try acquiring reader lock (non-blocking)
+    /** Return true if reader lock acquired; false otherwise. */
+    bool try_lock_read() {return internal_try_acquire_reader();}
+
+private:
+    typedef intptr_t state_t;
+    static const state_t WRITER = 1;
+    static const state_t WRITER_PENDING = 2;
+    static const state_t READERS = ~(WRITER | WRITER_PENDING);
+    static const state_t ONE_READER = 4;
+    static const state_t BUSY = WRITER | READERS;
+    //! State of lock
+    /** Bit 0 = writer is holding lock
+        Bit 1 = request by a writer to acquire lock (hint to readers to wait)
+        Bit 2..N = number of readers holding lock */
+    state_t state;
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(spin_rw_mutex)
+
+} // namespace tbb
+
+#endif /* __TBB_spin_rw_mutex_H */
diff --git a/tbb/include/tbb/task.h b/tbb/include/tbb/task.h
new file mode 100644 (file)
index 0000000..7b8dab8
--- /dev/null
@@ -0,0 +1,947 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_task_H
+#define __TBB_task_H
+
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+#include <climits>
+
+typedef struct ___itt_caller *__itt_caller;
+
+namespace tbb {
+
+class task;
+class task_list;
+
+#if __TBB_TASK_GROUP_CONTEXT
+class task_group_context;
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+// MSVC does not allow taking the address of a member that was defined 
+// privately in task_base and made public in class task via a using declaration.
+#if _MSC_VER || (__GNUC__==3 && __GNUC_MINOR__<3)
+#define __TBB_TASK_BASE_ACCESS public
+#else
+#define __TBB_TASK_BASE_ACCESS private
+#endif
+
+namespace internal {
+
+    class allocate_additional_child_of_proxy: no_assign {
+        //! No longer used, but retained for binary layout compatibility.  Always NULL.
+        task* self;
+        task& parent;
+    public:
+        explicit allocate_additional_child_of_proxy( task& parent_ ) : self(NULL), parent(parent_) {}
+        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
+        void __TBB_EXPORTED_METHOD free( task& ) const;
+    };
+
+}
+
+namespace interface5 {
+    namespace internal {
+        //! Base class for methods that became static in TBB 3.0.
+        /** TBB's evolution caused the "this" argument for several methods to become obsolete.
+            However, for backwards binary compatibility, the new methods need distinct names,
+            otherwise the One Definition Rule would be broken.  Hence the new methods are 
+            defined in this private base class, and then exposed in class task via 
+            using declarations. */
+        class task_base: tbb::internal::no_copy {
+        __TBB_TASK_BASE_ACCESS:
+            friend class tbb::task;
+
+            //! Schedule task for execution when a worker becomes available.
+            static void spawn( task& t );
+            //! Spawn multiple tasks and clear list.
+            static void spawn( task_list& list );
+
+            //! Like allocate_child, except that task's parent becomes "t", not this.
+            /** Typically used in conjunction with schedule_to_reexecute to implement while loops.
+               Atomically increments the reference count of t.parent() */
+            static tbb::internal::allocate_additional_child_of_proxy allocate_additional_child_of( task& t ) {
+                return tbb::internal::allocate_additional_child_of_proxy(t);
+            }
+
+            //! Destroy a task.
+            /** Usually, calling this method is unnecessary, because a task is
+                implicitly deleted after its execute() method runs.  However,
+                sometimes a task needs to be explicitly deallocated, such as
+                when a root task is used as the parent in spawn_and_wait_for_all. */
+            static void __TBB_EXPORTED_FUNC destroy( task& victim );
+        }; 
+    } // internal
+} // interface5
+
+//! @cond INTERNAL
+namespace internal {
+
+    class scheduler: no_copy {
+    public:
+        //! For internal use only
+        virtual void spawn( task& first, task*& next ) = 0;
+
+        //! For internal use only
+        virtual void wait_for_all( task& parent, task* child ) = 0;
+
+        //! For internal use only
+        virtual void spawn_root_and_wait( task& first, task*& next ) = 0;
+
+        //! Pure virtual destructor;
+        //  Have to have it just to shut up overzealous compilation warnings
+        virtual ~scheduler() = 0;
+
+        //! For internal use only
+        virtual void enqueue( task& t, void* reserved ) = 0;
+    };
+
+    //! A reference count
+    /** Should always be non-negative.  A signed type is used so that underflow can be detected. */
+    typedef intptr_t reference_count;
+
+    //! An id as used for specifying affinity.
+    typedef unsigned short affinity_id;
+
+#if __TBB_TASK_GROUP_CONTEXT
+    class generic_scheduler;
+
+    struct context_list_node_t {
+        context_list_node_t *my_prev,
+                            *my_next;
+    };
+
+    class allocate_root_with_context_proxy: no_assign {
+        task_group_context& my_context;
+    public:
+        allocate_root_with_context_proxy ( task_group_context& ctx ) : my_context(ctx) {}
+        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
+        void __TBB_EXPORTED_METHOD free( task& ) const;
+    };
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+    class allocate_root_proxy: no_assign {
+    public:
+        static task& __TBB_EXPORTED_FUNC allocate( size_t size );
+        static void __TBB_EXPORTED_FUNC free( task& );
+    };
+
+    class allocate_continuation_proxy: no_assign {
+    public:
+        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
+        void __TBB_EXPORTED_METHOD free( task& ) const;
+    };
+
+    class allocate_child_proxy: no_assign {
+    public:
+        task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
+        void __TBB_EXPORTED_METHOD free( task& ) const;
+    };
+
+    //! Memory prefix to a task object.
+    /** This class is internal to the library.
+        Do not reference it directly, except within the library itself.
+        Fields are ordered in way that preserves backwards compatibility and yields 
+        good packing on typical 32-bit and 64-bit platforms.
+        @ingroup task_scheduling */
+    class task_prefix {
+    private:
+        friend class tbb::task;
+        friend class tbb::interface5::internal::task_base;
+        friend class tbb::task_list;
+        friend class internal::scheduler;
+        friend class internal::allocate_root_proxy;
+        friend class internal::allocate_child_proxy;
+        friend class internal::allocate_continuation_proxy;
+        friend class internal::allocate_additional_child_of_proxy;
+
+#if __TBB_TASK_GROUP_CONTEXT
+        //! Shared context that is used to communicate asynchronous state changes
+        /** Currently it is used to broadcast cancellation requests generated both 
+            by users and as the result of unhandled exceptions in the task::execute()
+            methods. */
+        task_group_context  *context;
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+        
+        //! The scheduler that allocated the task, or NULL if the task is big.
+        /** Small tasks are pooled by the scheduler that allocated the task.
+            If a scheduler needs to free a small task allocated by another scheduler,
+            it returns the task to that other scheduler.  This policy avoids
+            memory space blowup issues for memory allocators that allocate from 
+            thread-specific pools. */
+        scheduler* origin;
+
+#if TBB_PREVIEW_TASK_PRIORITY
+        union {
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+        //! Obsolete. The scheduler that owns the task.
+        /** Retained only for the sake of backward binary compatibility. 
+            Still used by inline methods in the task.h header. **/
+        scheduler* owner;
+
+#if TBB_PREVIEW_TASK_PRIORITY
+        //! Pointer to the next offloaded lower priority task.
+        /** Used to maintain a list of offloaded tasks inside the scheduler. **/
+        task* next_offloaded;
+        };
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+        //! The task whose reference count includes me.
+        /** In the "blocking style" of programming, this field points to the parent task.
+            In the "continuation-passing style" of programming, this field points to the
+            continuation of the parent. */
+        tbb::task* parent;
+
+        //! Reference count used for synchronization.
+        /** In the "continuation-passing style" of programming, this field is
+            the difference of the number of allocated children minus the
+            number of children that have completed.
+            In the "blocking style" of programming, this field is one more than the difference. */
+        reference_count ref_count;
+
+        //! Obsolete. Used to be scheduling depth before TBB 2.2
+        /** Retained only for the sake of backward binary compatibility.
+            Not used by TBB anymore. **/
+        int depth;
+
+        //! A task::state_type, stored as a byte for compactness.
+        /** This state is exposed to users via method task::state(). */
+        unsigned char state;
+
+        //! Miscellaneous state that is not directly visible to users, stored as a byte for compactness.
+        /** 0x0 -> version 1.0 task
+            0x1 -> version >=2.1 task
+            0x20 -> task_proxy
+            0x40 -> task has live ref_count
+            0x80 -> a stolen task */
+        unsigned char extra_state;
+
+        affinity_id affinity;
+
+        //! "next" field for list of task
+        tbb::task* next;
+
+        //! The task corresponding to this task_prefix.
+        tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);}
+    };
+
+} // namespace internal
+//! @endcond
+
+#if __TBB_TASK_GROUP_CONTEXT
+
+#if TBB_PREVIEW_TASK_PRIORITY
+namespace internal {
+    static const int priority_stride_v4 = INT_MAX / 4;
+}
+
+enum priority_t {
+    priority_normal = internal::priority_stride_v4 * 2,
+    priority_low = priority_normal - internal::priority_stride_v4,
+    priority_high = priority_normal + internal::priority_stride_v4
+};
+
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+#if TBB_USE_CAPTURED_EXCEPTION
+    class tbb_exception;
+#else
+    namespace internal {
+        class tbb_exception_ptr;
+    }
+#endif /* !TBB_USE_CAPTURED_EXCEPTION */
+
+class task_scheduler_init;
+
+//! Used to form groups of tasks 
+/** @ingroup task_scheduling 
+    The context services explicit cancellation requests from user code, and unhandled 
+    exceptions intercepted during tasks execution. Intercepting an exception results 
+    in generating internal cancellation requests (which is processed in exactly the 
+    same way as external ones). 
+
+    The context is associated with one or more root tasks and defines the cancellation 
+    group that includes all the descendants of the corresponding root task(s). Association 
+    is established when a context object is passed as an argument to the task::allocate_root()
+    method. See task_group_context::task_group_context for more details.
+    
+    The context can be bound to another one, and other contexts can be bound to it,
+    forming a tree-like structure: parent -> this -> children. Arrows here designate
+    cancellation propagation direction. If a task in a cancellation group is canceled
+    all the other tasks in this group and groups bound to it (as children) get canceled too.
+
+    IMPLEMENTATION NOTE: 
+    When adding new members to task_group_context or changing types of existing ones, 
+    update the size of both padding buffers (_leading_padding and _trailing_padding)
+    appropriately. See also VERSIONING NOTE at the constructor definition below. **/
+class task_group_context : internal::no_copy {
+private:
+    friend class internal::generic_scheduler;
+    friend class task_scheduler_init;
+
+#if TBB_USE_CAPTURED_EXCEPTION
+    typedef tbb_exception exception_container_type;
+#else
+    typedef internal::tbb_exception_ptr exception_container_type;
+#endif
+
+    enum version_traits_word_layout {
+        traits_offset = 16,
+        version_mask = 0xFFFF,
+        traits_mask = 0xFFFFul << traits_offset
+    };
+
+public:
+    enum kind_type {
+        isolated,
+        bound
+    };
+
+    enum traits_type {
+        exact_exception = 0x0001ul << traits_offset,
+        concurrent_wait = 0x0004ul << traits_offset,
+#if TBB_USE_CAPTURED_EXCEPTION
+        default_traits = 0
+#else
+        default_traits = exact_exception
+#endif /* !TBB_USE_CAPTURED_EXCEPTION */
+    };
+
+private:
+    enum state {
+        may_have_children = 1
+    };
+
+    union {
+        //! Flavor of this context: bound or isolated.
+        kind_type my_kind;
+        uintptr_t _my_kind_aligner;
+    };
+
+    //! Pointer to the context of the parent cancellation group. NULL for isolated contexts.
+    task_group_context *my_parent;
+
+    //! Used to form the thread specific list of contexts without additional memory allocation.
+    /** A context is included into the list of the current thread when its binding to 
+        its parent happens. Any context can be present in the list of one thread only. **/
+    internal::context_list_node_t my_node;
+
+    //! Used to set and maintain stack stitching point for Intel Performance Tools.
+    __itt_caller itt_caller;
+
+    //! Leading padding protecting accesses to frequently used members from false sharing.
+    /** Read accesses to the field my_cancellation_requested are on the hot path inside
+        the scheduler. This padding ensures that this field never shares the same cache 
+        line with a local variable that is frequently written to. **/
+    char _leading_padding[internal::NFS_MaxLineSize
+                          - 2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(internal::context_list_node_t)
+                          - sizeof(__itt_caller)];
+    
+    //! Specifies whether cancellation was request for this task group.
+    uintptr_t my_cancellation_requested;
+    
+    //! Version for run-time checks and behavioral traits of the context.
+    /** Version occupies low 16 bits, and traits (zero or more ORed enumerators
+        from the traits_type enumerations) take the next 16 bits.
+        Original (zeroth) version of the context did not support any traits. **/
+    uintptr_t  my_version_and_traits;
+
+    //! Pointer to the container storing exception being propagated across this task group.
+    exception_container_type *my_exception;
+
+    //! Scheduler instance that registered this context in its thread specific list.
+    internal::generic_scheduler *my_owner;
+
+    //! Internal state (combination of state flags).
+    uintptr_t my_state;
+
+#if TBB_PREVIEW_TASK_PRIORITY
+    //! Priority level of the task group (in normalized representation)
+    intptr_t my_priority;
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+    //! Trailing padding protecting accesses to frequently used members from false sharing
+    /** \sa _leading_padding **/
+    char _trailing_padding[internal::NFS_MaxLineSize - 2 * sizeof(uintptr_t) - 2 * sizeof(void*)
+#if TBB_PREVIEW_TASK_PRIORITY
+                            - sizeof(intptr_t)
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+                          ];
+
+public:
+    //! Default & binding constructor.
+    /** By default a bound context is created. That is this context will be bound 
+        (as child) to the context of the task calling task::allocate_root(this_context) 
+        method. Cancellation requests passed to the parent context are propagated
+        to all the contexts bound to it. Similarly priority change is propagated
+        from the parent context to its children.
+
+        If task_group_context::isolated is used as the argument, then the tasks associated
+        with this context will never be affected by events in any other context.
+        
+        Creating isolated contexts involve much less overhead, but they have limited
+        utility. Normally when an exception occurs in an algorithm that has nested
+        ones running, it is desirably to have all the nested algorithms canceled 
+        as well. Such a behavior requires nested algorithms to use bound contexts.
+        
+        There is one good place where using isolated algorithms is beneficial. It is
+        a master thread. That is if a particular algorithm is invoked directly from
+        the master thread (not from a TBB task), supplying it with explicitly 
+        created isolated context will result in a faster algorithm startup.
+        
+        VERSIONING NOTE: 
+        Implementation(s) of task_group_context constructor(s) cannot be made 
+        entirely out-of-line because the run-time version must be set by the user 
+        code. This will become critically important for binary compatibility, if 
+        we ever have to change the size of the context object.
+
+        Boosting the runtime version will also be necessary if new data fields are 
+        introduced in the currently unused padding areas and these fields are updated 
+        by inline methods. **/
+    task_group_context ( kind_type relation_with_parent = bound,
+                         uintptr_t traits = default_traits )
+        : my_kind(relation_with_parent)
+        , my_version_and_traits(1 | traits)
+    {
+        init();
+    }
+
+    __TBB_EXPORTED_METHOD ~task_group_context ();
+
+    //! Forcefully reinitializes the context after the task tree it was associated with is completed.
+    /** Because the method assumes that all the tasks that used to be associated with 
+        this context have already finished, calling it while the context is still 
+        in use somewhere in the task hierarchy leads to undefined behavior.
+        
+        IMPORTANT: This method is not thread safe!
+
+        The method does not change the context's parent if it is set. **/ 
+    void __TBB_EXPORTED_METHOD reset ();
+
+    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
+    /** \return false if cancellation has already been requested, true otherwise. 
+
+        Note that canceling never fails. When false is returned, it just means that 
+        another thread (or this one) has already sent cancellation request to this
+        context or to one of its ancestors (if this context is bound). It is guaranteed
+        that when this method is concurrently called on the same not yet cancelled 
+        context, true will be returned by one and only one invocation. **/
+    bool __TBB_EXPORTED_METHOD cancel_group_execution ();
+
+    //! Returns true if the context received cancellation request.
+    bool __TBB_EXPORTED_METHOD is_group_execution_cancelled () const;
+
+    //! Records the pending exception, and cancels the task group.
+    /** May be called only from inside a catch-block. If the context is already 
+        canceled, does nothing. 
+        The method brings the task group associated with this context exactly into 
+        the state it would be in, if one of its tasks threw the currently pending 
+        exception during its execution. In other words, it emulates the actions 
+        of the scheduler's dispatch loop exception handler. **/
+    void __TBB_EXPORTED_METHOD register_pending_exception ();
+
+#if TBB_PREVIEW_TASK_PRIORITY
+    //! Changes priority of the task grop 
+    void set_priority ( priority_t );
+
+    //! Retrieves current priority of the current task group
+    priority_t priority () const;
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+protected:
+    //! Out-of-line part of the constructor. 
+    /** Singled out to ensure backward binary compatibility of the future versions. **/
+    void __TBB_EXPORTED_METHOD init ();
+
+private:
+    friend class task;
+    friend class internal::allocate_root_with_context_proxy;
+
+    static const kind_type binding_required = bound;
+    static const kind_type binding_completed = kind_type(bound+1);
+    static const kind_type detached = kind_type(binding_completed+1);
+    static const kind_type dying = kind_type(detached+1);
+
+    //! Propagates state change (if any) from an ancestor
+    /** Checks if one of this object's ancestors is in a new state, and propagates 
+        the new state to all its descendants in this object's heritage line. **/
+    template <typename T>
+    void propagate_state_from_ancestors ( T task_group_context::*mptr_state, T new_state );
+
+    //! Makes sure that the context is registered with a scheduler instance.
+    inline void finish_initialization ( internal::generic_scheduler *local_sched );
+
+    //! Registers this context with the local scheduler and binds it to its parent context
+    void bind_to ( internal::generic_scheduler *local_sched );
+
+    //! Registers this context with the local scheduler
+    void register_with ( internal::generic_scheduler *local_sched );
+
+}; // class task_group_context
+
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+//! Base class for user-defined tasks.
+/** @ingroup task_scheduling */
+class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base {
+
+    //! Set reference count
+    void __TBB_EXPORTED_METHOD internal_set_ref_count( int count );
+
+    //! Decrement reference count and return its new value.
+    internal::reference_count __TBB_EXPORTED_METHOD internal_decrement_ref_count();
+
+protected:
+    //! Default constructor.
+    task() {prefix().extra_state=1;}
+
+public:
+    //! Destructor.
+    virtual ~task() {}
+
+    //! Should be overridden by derived classes.
+    virtual task* execute() = 0;
+
+    //! Enumeration of task states that the scheduler considers.
+    enum state_type {
+        //! task is running, and will be destroyed after method execute() completes.
+        executing,
+        //! task to be rescheduled.
+        reexecute,
+        //! task is in ready pool, or is going to be put there, or was just taken off.
+        ready,
+        //! task object is freshly allocated or recycled.
+        allocated,
+        //! task object is on free list, or is going to be put there, or was just taken off.
+        freed,
+        //! task to be recycled as continuation
+        recycle 
+    };
+
+    //------------------------------------------------------------------------
+    // Allocating tasks
+    //------------------------------------------------------------------------
+
+    //! Returns proxy for overloaded new that allocates a root task.
+    static internal::allocate_root_proxy allocate_root() {
+        return internal::allocate_root_proxy();
+    }
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Returns proxy for overloaded new that allocates a root task associated with user supplied context.
+    static internal::allocate_root_with_context_proxy allocate_root( task_group_context& ctx ) {
+        return internal::allocate_root_with_context_proxy(ctx);
+    }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+    //! Returns proxy for overloaded new that allocates a continuation task of *this.
+    /** The continuation's parent becomes the parent of *this. */
+    internal::allocate_continuation_proxy& allocate_continuation() {
+        return *reinterpret_cast<internal::allocate_continuation_proxy*>(this);
+    }
+
+    //! Returns proxy for overloaded new that allocates a child task of *this.
+    internal::allocate_child_proxy& allocate_child() {
+        return *reinterpret_cast<internal::allocate_child_proxy*>(this);
+    }
+
+    //! Define recommended static form via import from base class.
+    using task_base::allocate_additional_child_of;
+
+#if __TBB_DEPRECATED_TASK_INTERFACE
+    //! Destroy a task.
+    /** Usually, calling this method is unnecessary, because a task is
+        implicitly deleted after its execute() method runs.  However,
+        sometimes a task needs to be explicitly deallocated, such as
+        when a root task is used as the parent in spawn_and_wait_for_all. */
+    void __TBB_EXPORTED_METHOD destroy( task& t );
+#else /* !__TBB_DEPRECATED_TASK_INTERFACE */
+    //! Define recommended static form via import from base class.
+    using task_base::destroy;
+#endif /* !__TBB_DEPRECATED_TASK_INTERFACE */
+
+    //------------------------------------------------------------------------
+    // Recycling of tasks
+    //------------------------------------------------------------------------
+
+    //! Change this to be a continuation of its former self.
+    /** The caller must guarantee that the task's refcount does not become zero until
+        after the method execute() returns.  Typically, this is done by having
+        method execute() return a pointer to a child of the task.  If the guarantee
+        cannot be made, use method recycle_as_safe_continuation instead. 
+       
+        Because of the hazard, this method may be deprecated in the future. */
+    void recycle_as_continuation() {
+        __TBB_ASSERT( prefix().state==executing, "execute not running?" );
+        prefix().state = allocated;
+    }
+
+    //! Recommended to use, safe variant of recycle_as_continuation
+    /** For safety, it requires additional increment of ref_count.
+        With no decendants and ref_count of 1, it has the semantics of recycle_to_reexecute. */
+    void recycle_as_safe_continuation() {
+        __TBB_ASSERT( prefix().state==executing, "execute not running?" );
+        prefix().state = recycle;
+    }
+
+    //! Change this to be a child of new_parent.
+    void recycle_as_child_of( task& new_parent ) {
+        internal::task_prefix& p = prefix();
+        __TBB_ASSERT( prefix().state==executing||prefix().state==allocated, "execute not running, or already recycled" );
+        __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when recycled as a child" );
+        __TBB_ASSERT( p.parent==NULL, "parent must be null" );
+        __TBB_ASSERT( new_parent.prefix().state<=recycle, "corrupt parent's state" );
+        __TBB_ASSERT( new_parent.prefix().state!=freed, "parent already freed" );
+        p.state = allocated;
+        p.parent = &new_parent;
+#if __TBB_TASK_GROUP_CONTEXT
+        p.context = new_parent.prefix().context;
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+    }
+
+    //! Schedule this for reexecution after current execute() returns.
+    /** Made obsolete by recycle_as_safe_continuation; may become deprecated. */
+    void recycle_to_reexecute() {
+        __TBB_ASSERT( prefix().state==executing, "execute not running, or already recycled" );
+        __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when recycled for reexecution" );
+        prefix().state = reexecute;
+    }
+
+    // All depth-related methods are obsolete, and are retained for the sake 
+    // of backward source compatibility only
+    intptr_t depth() const {return 0;}
+    void set_depth( intptr_t ) {}
+    void add_to_depth( int ) {}
+
+
+    //------------------------------------------------------------------------
+    // Spawning and blocking
+    //------------------------------------------------------------------------
+
+    //! Set reference count
+    void set_ref_count( int count ) {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+        internal_set_ref_count(count);
+#else
+        prefix().ref_count = count;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+    }
+
+    //! Atomically increment reference count and returns its old value.
+    /** Has acquire semantics */  
+    void increment_ref_count() {
+        __TBB_FetchAndIncrementWacquire( &prefix().ref_count );
+    }
+
+    //! Atomically decrement reference count and returns its new value.
+    /** Has release semantics. */  
+    int decrement_ref_count() {
+#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
+        return int(internal_decrement_ref_count());
+#else
+        return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))-1;
+#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
+    }
+
+    //! Define recommended static forms via import from base class.
+    using task_base::spawn;
+
+    //! Similar to spawn followed by wait_for_all, but more efficient.
+    void spawn_and_wait_for_all( task& child ) {
+        prefix().owner->wait_for_all( *this, &child );
+    }
+
+    //! Similar to spawn followed by wait_for_all, but more efficient.
+    void __TBB_EXPORTED_METHOD spawn_and_wait_for_all( task_list& list );
+
+    //! Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
+    static void spawn_root_and_wait( task& root ) {
+        root.prefix().owner->spawn_root_and_wait( root, root.prefix().next );
+    }
+
+    //! Spawn root tasks on list and wait for all of them to finish.
+    /** If there are more tasks than worker threads, the tasks are spawned in
+        order of front to back. */
+    static void spawn_root_and_wait( task_list& root_list );
+
+    //! Wait for reference count to become one, and set reference count to zero.
+    /** Works on tasks while waiting. */
+    void wait_for_all() {
+        prefix().owner->wait_for_all( *this, NULL );
+    }
+
+    //! Enqueue task for starvation-resistant execution.
+#if TBB_PREVIEW_TASK_PRIORITY
+    /** The task will be enqueued on the normal priority level disregarding the
+        priority of its task group.
+        
+        The rationale of such semantics is that priority of an enqueued task is
+        statically fixed at the moment of its enqueuing, while task group priority
+        is dynamic. Thus automatic priority inheritance would be generally a subject
+        to the race, which may result in unexpected behavior. 
+        
+        Use enqueue() overload with explicit priority value and task::group_priority()
+        method to implement such priority inheritance when it is really necessary. **/
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+    static void enqueue( task& t ) {
+        t.prefix().owner->enqueue( t, NULL );
+    }
+
+#if TBB_PREVIEW_TASK_PRIORITY
+    //! Enqueue task for starvation-resistant execution on the specified priority level.
+    static void enqueue( task& t, priority_t p ) {
+        __TBB_ASSERT( p == priority_low || p == priority_normal || p == priority_high, "Invalid priority level value" );
+        t.prefix().owner->enqueue( t, (void*)p );
+    }
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+    //! The innermost task being executed or destroyed by the current thread at the moment.
+    static task& __TBB_EXPORTED_FUNC self();
+
+    //! task on whose behalf this task is working, or NULL if this is a root.
+    task* parent() const {return prefix().parent;}
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! This method is deprecated and will be removed in the future.
+    /** Use method group() instead. **/
+    task_group_context* context() {return prefix().context;}
+
+    //! Pointer to the task group descriptor.
+    task_group_context* group () { return prefix().context; }
+#endif /* __TBB_TASK_GROUP_CONTEXT */   
+
+    //! True if task was stolen from the task pool of another thread.
+    bool is_stolen_task() const {
+        return (prefix().extra_state & 0x80)!=0;
+    }
+
+    //------------------------------------------------------------------------
+    // Debugging
+    //------------------------------------------------------------------------
+
+    //! Current execution state
+    state_type state() const {return state_type(prefix().state);}
+
+    //! The internal reference count.
+    int ref_count() const {
+#if TBB_USE_ASSERT
+        internal::reference_count ref_count_ = prefix().ref_count;
+        __TBB_ASSERT( ref_count_==int(ref_count_), "integer overflow error");
+#endif
+        return int(prefix().ref_count);
+    }
+
+    //! Obsolete, and only retained for the sake of backward compatibility. Always returns true.
+    bool __TBB_EXPORTED_METHOD is_owned_by_current_thread() const;
+
+    //------------------------------------------------------------------------
+    // Affinity
+    //------------------------------------------------------------------------
+    //! An id as used for specifying affinity.
+    /** Guaranteed to be integral type.  Value of 0 means no affinity. */
+    typedef internal::affinity_id affinity_id;
+
+    //! Set affinity for this task.
+    void set_affinity( affinity_id id ) {prefix().affinity = id;}
+
+    //! Current affinity of this task
+    affinity_id affinity() const {return prefix().affinity;}
+
+    //! Invoked by scheduler to notify task that it ran on unexpected thread.
+    /** Invoked before method execute() runs, if task is stolen, or task has 
+        affinity but will be executed on another thread. 
+
+        The default action does nothing. */
+    virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id );
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Moves this task from its current group into another one.
+    /** Argument ctx specifies the new group.
+
+        The primary purpose of this method is to associate unique task group context
+        with a task allocated for subsequent enqueuing. In contrast to spawned tasks
+        enqueued ones normally outlive the scope where they were created. This makes
+        traditional usage model where task group context are allocated locally on
+        the stack inapplicable. Dynamic allocation of context objects is performance
+        inefficient. Method change_group() allows to make task group context object
+        a member of the task class, and then associate it with its containing task 
+        object in the latter's constructor. **/
+    void __TBB_EXPORTED_METHOD change_group ( task_group_context& ctx );
+
+    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
+    /** \return false if cancellation has already been requested, true otherwise. **/
+    bool cancel_group_execution () { return prefix().context->cancel_group_execution(); }
+
+    //! Returns true if the context has received cancellation request.
+    bool is_cancelled () const { return prefix().context->is_group_execution_cancelled(); }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+#if TBB_PREVIEW_TASK_PRIORITY
+    //! Changes priority of the task group this task belongs to.
+    void set_group_priority ( priority_t p ) {  prefix().context->set_priority(p); }
+
+    //! Retrieves current priority of the task group this task belongs to.
+    priority_t group_priority () const { return prefix().context->priority(); }
+
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+private:
+    friend class interface5::internal::task_base;
+    friend class task_list;
+    friend class internal::scheduler;
+    friend class internal::allocate_root_proxy;
+#if __TBB_TASK_GROUP_CONTEXT
+    friend class internal::allocate_root_with_context_proxy;
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+    friend class internal::allocate_continuation_proxy;
+    friend class internal::allocate_child_proxy;
+    friend class internal::allocate_additional_child_of_proxy;
+    
+    //! Get reference to corresponding task_prefix.
+    /** Version tag prevents loader on Linux from using the wrong symbol in debug builds. **/
+    internal::task_prefix& prefix( internal::version_tag* = NULL ) const {
+        return reinterpret_cast<internal::task_prefix*>(const_cast<task*>(this))[-1];
+    }
+}; // class task
+
+//! task that does nothing.  Useful for synchronization.
+/** @ingroup task_scheduling */
+class empty_task: public task {
+    /*override*/ task* execute() {
+        return NULL;
+    }
+};
+
+//! A list of children.
+/** Used for method task::spawn_children
+    @ingroup task_scheduling */
+class task_list: internal::no_copy {
+private:
+    task* first;
+    task** next_ptr;
+    friend class task;
+    friend class interface5::internal::task_base;
+public:
+    //! Construct empty list
+    task_list() : first(NULL), next_ptr(&first) {}
+
+    //! Destroys the list, but does not destroy the task objects.
+    ~task_list() {}
+
+    //! True if list if empty; false otherwise.
+    bool empty() const {return !first;}
+
+    //! Push task onto back of list.
+    void push_back( task& task ) {
+        task.prefix().next = NULL;
+        *next_ptr = &task;
+        next_ptr = &task.prefix().next;
+    }
+
+    //! Pop the front task from the list.
+    task& pop_front() {
+        __TBB_ASSERT( !empty(), "attempt to pop item from empty task_list" );
+        task* result = first;
+        first = result->prefix().next;
+        if( !first ) next_ptr = &first;
+        return *result;
+    }
+
+    //! Clear the list
+    void clear() {
+        first=NULL;
+        next_ptr=&first;
+    }
+};
+
+inline void interface5::internal::task_base::spawn( task& t ) {
+    t.prefix().owner->spawn( t, t.prefix().next );
+}
+
+inline void interface5::internal::task_base::spawn( task_list& list ) {
+    if( task* t = list.first ) {
+        t->prefix().owner->spawn( *t, *list.next_ptr );
+        list.clear();
+    }
+}
+
+inline void task::spawn_root_and_wait( task_list& root_list ) {
+    if( task* t = root_list.first ) {
+        t->prefix().owner->spawn_root_and_wait( *t, *root_list.next_ptr );
+        root_list.clear();
+    }
+}
+
+} // namespace tbb
+
+inline void *operator new( size_t bytes, const tbb::internal::allocate_root_proxy& ) {
+    return &tbb::internal::allocate_root_proxy::allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_root_proxy& ) {
+    tbb::internal::allocate_root_proxy::free( *static_cast<tbb::task*>(task) );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+inline void *operator new( size_t bytes, const tbb::internal::allocate_root_with_context_proxy& p ) {
+    return &p.allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_root_with_context_proxy& p ) {
+    p.free( *static_cast<tbb::task*>(task) );
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+inline void *operator new( size_t bytes, const tbb::internal::allocate_continuation_proxy& p ) {
+    return &p.allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_continuation_proxy& p ) {
+    p.free( *static_cast<tbb::task*>(task) );
+}
+
+inline void *operator new( size_t bytes, const tbb::internal::allocate_child_proxy& p ) {
+    return &p.allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_child_proxy& p ) {
+    p.free( *static_cast<tbb::task*>(task) );
+}
+
+inline void *operator new( size_t bytes, const tbb::internal::allocate_additional_child_of_proxy& p ) {
+    return &p.allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_additional_child_of_proxy& p ) {
+    p.free( *static_cast<tbb::task*>(task) );
+}
+
+#endif /* __TBB_task_H */
diff --git a/tbb/include/tbb/task_group.h b/tbb/include/tbb/task_group.h
new file mode 100644 (file)
index 0000000..fd4d552
--- /dev/null
@@ -0,0 +1,248 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_task_group_H
+#define __TBB_task_group_H
+
+#include "task.h"
+#include "tbb_exception.h"
+
+namespace tbb {
+
+namespace internal {
+    template<typename F> class task_handle_task;
+}
+
+template<typename F>
+class task_handle : internal::no_assign {
+    template<typename _F> friend class internal::task_handle_task;
+
+    static const intptr_t scheduled = 0x1;
+
+    F my_func;
+    intptr_t my_state;
+
+    void mark_scheduled () {
+        // The check here is intentionally lax to avoid the impact of interlocked operation
+        if ( my_state & scheduled )
+            internal::throw_exception( internal::eid_invalid_multiple_scheduling );
+        my_state |= scheduled;
+    }
+public:
+    task_handle( const F& f ) : my_func(f), my_state(0) {}
+
+    void operator() () const { my_func(); }
+};
+
+enum task_group_status {
+    not_complete,
+    complete,
+    canceled
+};
+
+namespace internal {
+
+// Suppress gratuitous warnings from icc 11.0 when lambda expressions are used in instances of function_task.
+//#pragma warning(disable: 588)
+
+template<typename F>
+class function_task : public task {
+    F my_func;
+    /*override*/ task* execute() {
+        my_func();
+        return NULL;
+    }
+public:
+    function_task( const F& f ) : my_func(f) {}
+};
+
+template<typename F>
+class task_handle_task : public task {
+    task_handle<F>& my_handle;
+    /*override*/ task* execute() {
+        my_handle();
+        return NULL;
+    }
+public:
+    task_handle_task( task_handle<F>& h ) : my_handle(h) { h.mark_scheduled(); }
+};
+
+class task_group_base : internal::no_copy {
+protected:
+    empty_task* my_root;
+    task_group_context my_context;
+
+    task& owner () { return *my_root; }
+
+    template<typename F>
+    task_group_status internal_run_and_wait( F& f ) {
+        __TBB_TRY {
+            if ( !my_context.is_group_execution_cancelled() )
+                f();
+        } __TBB_CATCH( ... ) {
+            my_context.register_pending_exception();
+        }
+        return wait();
+    }
+
+    template<typename F, typename Task>
+    void internal_run( F& f ) {
+        owner().spawn( *new( owner().allocate_additional_child_of(*my_root) ) Task(f) );
+    }
+
+public:
+    task_group_base( uintptr_t traits = 0 )
+        : my_context(task_group_context::bound, task_group_context::default_traits | traits)
+    {
+        my_root = new( task::allocate_root(my_context) ) empty_task;
+        my_root->set_ref_count(1);
+    }
+
+    ~task_group_base() {
+        if( my_root->ref_count() > 1 ) {
+            bool stack_unwinding_in_progress = std::uncaught_exception();
+            // Always attempt to do proper cleanup to avoid inevitable memory corruption 
+            // in case of missing wait (for the sake of better testability & debuggability)
+            if ( !is_canceling() )
+                cancel();
+            __TBB_TRY {
+                my_root->wait_for_all();
+            } __TBB_CATCH (...) {
+                task::destroy(*my_root);
+                __TBB_RETHROW();
+            }
+            task::destroy(*my_root);
+            if ( !stack_unwinding_in_progress )
+                internal::throw_exception( internal::eid_missing_wait );
+        }
+        else {
+            task::destroy(*my_root);
+        }
+    }
+
+    template<typename F>
+    void run( task_handle<F>& h ) {
+        internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
+    }
+
+    task_group_status wait() {
+        __TBB_TRY {
+            my_root->wait_for_all();
+        } __TBB_CATCH( ... ) {
+            my_context.reset();
+            __TBB_RETHROW();
+        }
+        if ( my_context.is_group_execution_cancelled() ) {
+            my_context.reset();
+            return canceled;
+        }
+        return complete;
+    }
+
+    bool is_canceling() {
+        return my_context.is_group_execution_cancelled();
+    }
+
+    void cancel() {
+        my_context.cancel_group_execution();
+    }
+}; // class task_group_base
+
+} // namespace internal
+
+class task_group : public internal::task_group_base {
+public:
+    task_group () : task_group_base( task_group_context::concurrent_wait ) {}
+
+#if TBB_DEPRECATED
+    ~task_group() __TBB_TRY {
+        __TBB_ASSERT( my_root->ref_count() != 0, NULL );
+        if( my_root->ref_count() > 1 )
+            my_root->wait_for_all();
+    }
+#if TBB_USE_EXCEPTIONS
+    catch (...) {
+        // Have to destroy my_root here as the base class destructor won't be called
+        task::destroy(*my_root);
+        throw;
+    }
+#endif /* TBB_USE_EXCEPTIONS */
+#endif /* TBB_DEPRECATED */
+
+#if __SUNPRO_CC
+    template<typename F>
+    void run( task_handle<F>& h ) {
+        internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
+    }
+#else
+    using task_group_base::run;
+#endif
+
+    template<typename F>
+    void run( const F& f ) {
+        internal_run< const F, internal::function_task<F> >( f );
+    }
+
+    template<typename F>
+    task_group_status run_and_wait( const F& f ) {
+        return internal_run_and_wait<const F>( f );
+    }
+
+    template<typename F>
+    task_group_status run_and_wait( task_handle<F>& h ) {
+      return internal_run_and_wait< task_handle<F> >( h );
+    }
+}; // class task_group
+
+class structured_task_group : public internal::task_group_base {
+public:
+    template<typename F>
+    task_group_status run_and_wait ( task_handle<F>& h ) {
+        return internal_run_and_wait< task_handle<F> >( h );
+    }
+
+    task_group_status wait() {
+        task_group_status res = task_group_base::wait();
+        my_root->set_ref_count(1);
+        return res;
+    }
+}; // class structured_task_group
+
+inline 
+bool is_current_task_group_canceling() {
+    return task::self().is_cancelled();
+}
+
+template<class F>
+task_handle<F> make_task( const F& f ) {
+    return task_handle<F>( f );
+}
+
+} // namespace tbb
+
+#endif /* __TBB_task_group_H */
diff --git a/tbb/include/tbb/task_scheduler_init.h b/tbb/include/tbb/task_scheduler_init.h
new file mode 100644 (file)
index 0000000..2f8658e
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_task_scheduler_init_H
+#define __TBB_task_scheduler_init_H
+
+#include "tbb_stddef.h"
+#include "limits.h"
+
+namespace tbb {
+
+typedef std::size_t stack_size_type;
+
+//! @cond INTERNAL
+namespace internal {
+    //! Internal to library. Should not be used by clients.
+    /** @ingroup task_scheduling */
+    class scheduler;
+} // namespace internal
+//! @endcond
+
+//! Class delimiting the scope of task scheduler activity.
+/** A thread can construct a task_scheduler_init object and keep it alive
+    while it uses TBB's tasking subsystem (including parallel algorithms).
+
+    This class allows to customize properties of the TBB task pool to some extent.
+    For example it can limit concurrency level of parallel work initiated by the
+    given thread. It also can be used to specify stack size of the TBB worker threads,
+    though this setting is not effective if the thread pool has already been created.
+
+    If a parallel construct is used without task_scheduler_init object previously
+    created, the scheduler will be initialized automatically with default settings,
+    and will persist until this thread exits. Default concurrency level is defined
+    as described in task_scheduler_init::initialize().
+    @ingroup task_scheduling */
+class task_scheduler_init: internal::no_copy {
+#if TBB_USE_EXCEPTIONS
+    enum ExceptionPropagationMode {
+        propagation_mode_exact = 1u,
+        propagation_mode_captured = 2u,
+        propagation_mode_mask = propagation_mode_exact | propagation_mode_captured
+    };
+#endif /* TBB_USE_EXCEPTIONS */
+
+    /** NULL if not currently initialized. */
+    internal::scheduler* my_scheduler;
+public:
+
+    //! Typedef for number of threads that is automatic.
+    static const int automatic = -1;
+
+    //! Argument to initialize() or constructor that causes initialization to be deferred.
+    static const int deferred = -2;
+
+    //! Ensure that scheduler exists for this thread
+    /** A value of -1 lets TBB decide on the number of threads, which is usually
+        maximal hardware concurrency for this process, that is the number of logical
+        CPUs on the machine (possibly limited by the processor affinity mask of this
+        process (Windows) or of this thread (Linux, FreeBSD). It is preferable option
+        for production code because it helps to avoid nasty surprises when several
+        TBB based components run side-by-side or in a nested fashion inside the same
+        process.
+
+        The number_of_threads is ignored if any other task_scheduler_inits 
+        currently exist.  A thread may construct multiple task_scheduler_inits.  
+        Doing so does no harm because the underlying scheduler is reference counted. */
+    void __TBB_EXPORTED_METHOD initialize( int number_of_threads=automatic );
+
+    //! The overloaded method with stack size parameter
+    /** Overloading is necessary to preserve ABI compatibility */
+    void __TBB_EXPORTED_METHOD initialize( int number_of_threads, stack_size_type thread_stack_size );
+
+    //! Inverse of method initialize.
+    void __TBB_EXPORTED_METHOD terminate();
+
+    //! Shorthand for default constructor followed by call to initialize(number_of_threads).
+    task_scheduler_init( int number_of_threads=automatic, stack_size_type thread_stack_size=0 ) : my_scheduler(NULL)  {
+#if TBB_USE_EXCEPTIONS
+        // Take two lowest order bits of the stack size argument to communicate
+        // default exception propagation mode of the client to be used when the
+        // client manually creates tasks in the master thread and does not use
+        // explicit task group context object. This is necessary because newer 
+        // TBB binaries with exact propagation enabled by default may be used 
+        // by older clients that expect tbb::captured_exception wrapper.
+        // All zeros mean old client - no preference. 
+        __TBB_ASSERT( !(thread_stack_size & propagation_mode_mask), "Requested stack size is not aligned" );
+        thread_stack_size |= TBB_USE_CAPTURED_EXCEPTION ? propagation_mode_captured : propagation_mode_exact;
+#endif /* TBB_USE_EXCEPTIONS */
+        initialize( number_of_threads, thread_stack_size );
+    }
+
+    //! Destroy scheduler for this thread if thread has no other live task_scheduler_inits.
+    ~task_scheduler_init() {
+        if( my_scheduler ) 
+            terminate();
+        internal::poison_pointer( my_scheduler );
+    }
+    //! Returns the number of threads TBB scheduler would create if initialized by default.
+    /** Result returned by this method does not depend on whether the scheduler 
+        has already been initialized.
+        
+        Because tbb 2.0 does not support blocking tasks yet, you may use this method
+        to boost the number of threads in the tbb's internal pool, if your tasks are 
+        doing I/O operations. The optimal number of additional threads depends on how
+        much time your tasks spend in the blocked state.
+        
+        Before TBB 3.0 U4 this method returned the number of logical CPU in the
+        system. Currently on Windows, Linux and FreeBSD it returns the number of
+        logical CPUs available to the current process in accordance with its affinity
+        mask.
+        
+        NOTE: The return value of this method never changes after its first invocation. 
+        This means that changes in the process affinity mask that took place after
+        this method was first invoked will not affect the number of worker threads
+        in the TBB worker threads pool. */
+    static int __TBB_EXPORTED_FUNC default_num_threads ();
+
+    //! Returns true if scheduler is active (initialized); false otherwise
+    bool is_active() const { return my_scheduler != NULL; }
+};
+
+} // namespace tbb
+
+#endif /* __TBB_task_scheduler_init_H */
diff --git a/tbb/include/tbb/task_scheduler_observer.h b/tbb/include/tbb/task_scheduler_observer.h
new file mode 100644 (file)
index 0000000..4c09863
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_task_scheduler_observer_H
+#define __TBB_task_scheduler_observer_H
+
+#include "atomic.h"
+
+#if __TBB_SCHEDULER_OBSERVER
+
+namespace tbb {
+
+namespace internal {
+
+class observer_proxy;
+
+class task_scheduler_observer_v3 {
+    friend class observer_proxy;
+    observer_proxy* my_proxy;
+    atomic<intptr_t> my_busy_count;
+public:
+    //! Enable or disable observation
+    void __TBB_EXPORTED_METHOD observe( bool state=true );
+
+    //! True if observation is enables; false otherwise.
+    bool is_observing() const {return my_proxy!=NULL;}
+
+    //! Construct observer with observation disabled.
+    task_scheduler_observer_v3() : my_proxy(NULL) {my_busy_count=0;}
+
+    //! Called by thread before first steal since observation became enabled
+    virtual void on_scheduler_entry( bool /*is_worker*/ ) {} 
+
+    //! Called by thread when it no longer takes part in task stealing.
+    virtual void on_scheduler_exit( bool /*is_worker*/ ) {}
+
+    //! Destructor
+    virtual ~task_scheduler_observer_v3() {observe(false);}
+};
+
+} // namespace internal
+
+typedef internal::task_scheduler_observer_v3 task_scheduler_observer;
+
+} // namespace tbb
+
+#endif /* __TBB_SCHEDULER_OBSERVER */
+
+#endif /* __TBB_task_scheduler_observer_H */
diff --git a/tbb/include/tbb/tbb.h b/tbb/include/tbb/tbb.h
new file mode 100644 (file)
index 0000000..259d1df
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_H
+#define __TBB_tbb_H
+
+/** 
+    This header bulk-includes declarations or definitions of all the functionality 
+    provided by TBB (save for malloc dependent headers). 
+
+    If you use only a few TBB constructs, consider including specific headers only.
+    Any header listed below can be included independently of others.
+**/
+
+#include "aligned_space.h"
+#include "atomic.h"
+#include "blocked_range.h"
+#include "blocked_range2d.h"
+#include "blocked_range3d.h"
+#include "cache_aligned_allocator.h"
+#include "combinable.h"
+#include "concurrent_unordered_map.h"
+#include "concurrent_hash_map.h"
+#include "concurrent_queue.h"
+#include "concurrent_vector.h"
+#include "critical_section.h"
+#include "enumerable_thread_specific.h"
+#include "mutex.h"
+#include "null_mutex.h"
+#include "null_rw_mutex.h"
+#include "parallel_do.h"
+#include "parallel_for.h"
+#include "parallel_for_each.h"
+#include "parallel_invoke.h"
+#include "parallel_reduce.h"
+#include "parallel_scan.h"
+#include "parallel_sort.h"
+#include "partitioner.h"
+#include "pipeline.h"
+#include "queuing_mutex.h"
+#include "queuing_rw_mutex.h"
+#include "reader_writer_lock.h"
+#if TBB_PREVIEW_CONCURRENT_PRIORITY_QUEUE
+#include "concurrent_priority_queue.h"
+#endif
+#include "recursive_mutex.h"
+#include "spin_mutex.h"
+#include "spin_rw_mutex.h"
+#include "task.h"
+#include "task_group.h"
+#include "task_scheduler_init.h"
+#include "task_scheduler_observer.h"
+#include "tbb_allocator.h"
+#include "tbb_exception.h"
+#include "tbb_thread.h"
+#include "tick_count.h"
+
+#endif /* __TBB_tbb_H */
diff --git a/tbb/include/tbb/tbb_allocator.h b/tbb/include/tbb/tbb_allocator.h
new file mode 100644 (file)
index 0000000..bb4690e
--- /dev/null
@@ -0,0 +1,214 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_allocator_H
+#define __TBB_tbb_allocator_H
+
+#include "tbb_stddef.h"
+#include <new>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <cstring>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+
+    //! Deallocates memory using FreeHandler
+    /** The function uses scalable_free if scalable allocator is available and free if not*/
+    void __TBB_EXPORTED_FUNC deallocate_via_handler_v3( void *p );
+
+    //! Allocates memory using MallocHandler
+    /** The function uses scalable_malloc if scalable allocator is available and malloc if not*/
+    void* __TBB_EXPORTED_FUNC allocate_via_handler_v3( size_t n );
+
+    //! Returns true if standard malloc/free are used to work with memory.
+    bool __TBB_EXPORTED_FUNC is_malloc_used_v3();
+}
+//! @endcond
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
+    #pragma warning (push)
+    #pragma warning (disable: 4100)
+#endif
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** The class selects the best memory allocation mechanism available 
+    from scalable_malloc and standard malloc.
+    The members are ordered the same way they are in section 20.4.1
+    of the ISO C++ standard.
+    @ingroup memory_allocation */
+template<typename T>
+class tbb_allocator {
+public:
+    typedef typename internal::allocator_type<T>::value_type value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> struct rebind {
+        typedef tbb_allocator<U> other;
+    };
+
+    //! Specifies current allocator
+    enum malloc_type {
+        scalable, 
+        standard
+    };
+
+    tbb_allocator() throw() {}
+    tbb_allocator( const tbb_allocator& ) throw() {}
+    template<typename U> tbb_allocator(const tbb_allocator<U>&) throw() {}
+
+    pointer address(reference x) const {return &x;}
+    const_pointer address(const_reference x) const {return &x;}
+    
+    //! Allocate space for n objects.
+    pointer allocate( size_type n, const void* /*hint*/ = 0) {
+        return pointer(internal::allocate_via_handler_v3( n * sizeof(value_type) ));
+    }
+
+    //! Free previously allocated block of memory.
+    void deallocate( pointer p, size_type ) {
+        internal::deallocate_via_handler_v3(p);        
+    }
+
+    //! Largest value for which method allocate might succeed.
+    size_type max_size() const throw() {
+        size_type max = static_cast<size_type>(-1) / sizeof (value_type);
+        return (max > 0 ? max : 1);
+    }
+    
+    //! Copy-construct value at location pointed to by p.
+    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
+
+    //! Destroy value at location pointed to by p.
+    void destroy( pointer p ) {p->~value_type();}
+
+    //! Returns current allocator
+    static malloc_type allocator_type() {
+        return internal::is_malloc_used_v3() ? standard : scalable;
+    }
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4100 is back
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<> 
+class tbb_allocator<void> {
+public:
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+    template<typename U> struct rebind {
+        typedef tbb_allocator<U> other;
+    };
+};
+
+template<typename T, typename U>
+inline bool operator==( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return true;}
+
+template<typename T, typename U>
+inline bool operator!=( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return false;}
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** The class is an adapter over an actual allocator that fills the allocation
+    using memset function with template argument C as the value.
+    The members are ordered the same way they are in section 20.4.1
+    of the ISO C++ standard.
+    @ingroup memory_allocation */
+template <typename T, template<typename X> class Allocator = tbb_allocator>
+class zero_allocator : public Allocator<T>
+{
+public:
+    typedef Allocator<T> base_allocator_type;
+    typedef typename base_allocator_type::value_type value_type;
+    typedef typename base_allocator_type::pointer pointer;
+    typedef typename base_allocator_type::const_pointer const_pointer;
+    typedef typename base_allocator_type::reference reference;
+    typedef typename base_allocator_type::const_reference const_reference;
+    typedef typename base_allocator_type::size_type size_type;
+    typedef typename base_allocator_type::difference_type difference_type;
+    template<typename U> struct rebind {
+        typedef zero_allocator<U, Allocator> other;
+    };
+
+    zero_allocator() throw() { }
+    zero_allocator(const zero_allocator &a) throw() : base_allocator_type( a ) { }
+    template<typename U>
+    zero_allocator(const zero_allocator<U> &a) throw() : base_allocator_type( Allocator<U>( a ) ) { }
+
+    pointer allocate(const size_type n, const void *hint = 0 ) {
+        pointer ptr = base_allocator_type::allocate( n, hint );
+        std::memset( ptr, 0, n * sizeof(value_type) );
+        return ptr;
+    }
+};
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<template<typename T> class Allocator> 
+class zero_allocator<void, Allocator> : public Allocator<void> {
+public:
+    typedef Allocator<void> base_allocator_type;
+    typedef typename base_allocator_type::value_type value_type;
+    typedef typename base_allocator_type::pointer pointer;
+    typedef typename base_allocator_type::const_pointer const_pointer;
+    template<typename U> struct rebind {
+        typedef zero_allocator<U, Allocator> other;
+    };
+};
+
+template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
+inline bool operator==( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
+    return static_cast< B1<T1> >(a) == static_cast< B2<T2> >(b);
+}
+template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
+inline bool operator!=( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
+    return static_cast< B1<T1> >(a) != static_cast< B2<T2> >(b);
+}
+
+} // namespace tbb 
+
+#endif /* __TBB_tbb_allocator_H */
diff --git a/tbb/include/tbb/tbb_config.h b/tbb/include/tbb/tbb_config.h
new file mode 100644 (file)
index 0000000..988035f
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_config_H
+#define __TBB_tbb_config_H
+
+/** This header is supposed to contain macro definitions and C style comments only.
+    The macros defined here are intended to control such aspects of TBB build as 
+    - compilation modes
+    - feature sets
+    - workarounds presence 
+**/
+
+/** Compilation modes **/
+
+#ifndef TBB_USE_DEBUG
+#ifdef TBB_DO_ASSERT
+#define TBB_USE_DEBUG TBB_DO_ASSERT
+#else
+#define TBB_USE_DEBUG 0
+#endif /* TBB_DO_ASSERT */
+#else
+#define TBB_DO_ASSERT TBB_USE_DEBUG
+#endif /* TBB_USE_DEBUG */
+
+#ifndef TBB_USE_ASSERT
+#ifdef TBB_DO_ASSERT
+#define TBB_USE_ASSERT TBB_DO_ASSERT
+#else 
+#define TBB_USE_ASSERT TBB_USE_DEBUG
+#endif /* TBB_DO_ASSERT */
+#endif /* TBB_USE_ASSERT */
+
+#ifndef TBB_USE_THREADING_TOOLS
+#ifdef TBB_DO_THREADING_TOOLS
+#define TBB_USE_THREADING_TOOLS TBB_DO_THREADING_TOOLS
+#else 
+#define TBB_USE_THREADING_TOOLS TBB_USE_DEBUG
+#endif /* TBB_DO_THREADING_TOOLS */
+#endif /* TBB_USE_THREADING_TOOLS */
+
+#ifndef TBB_USE_PERFORMANCE_WARNINGS
+#ifdef TBB_PERFORMANCE_WARNINGS
+#define TBB_USE_PERFORMANCE_WARNINGS TBB_PERFORMANCE_WARNINGS
+#else 
+#define TBB_USE_PERFORMANCE_WARNINGS TBB_USE_DEBUG
+#endif /* TBB_PEFORMANCE_WARNINGS */
+#endif /* TBB_USE_PERFORMANCE_WARNINGS */
+
+#if !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) && !defined(__SUNPRO_CC) || defined(_XBOX)
+    #if TBB_USE_EXCEPTIONS
+        #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
+    #elif !defined(TBB_USE_EXCEPTIONS)
+        #define TBB_USE_EXCEPTIONS 0
+    #endif
+#elif !defined(TBB_USE_EXCEPTIONS)
+    #define TBB_USE_EXCEPTIONS 1
+#endif
+
+#ifndef TBB_IMPLEMENT_CPP0X
+    /** By default, use C++0x classes if available **/
+    #if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__
+        #define TBB_IMPLEMENT_CPP0X 0
+    #else
+        #define TBB_IMPLEMENT_CPP0X 1
+    #endif
+#endif /* TBB_IMPLEMENT_CPP0X */
+
+#ifndef __TBB_DYNAMIC_LOAD_ENABLED
+    #define __TBB_DYNAMIC_LOAD_ENABLED !__TBB_TASK_CPP_DIRECTLY_INCLUDED
+#elif !__TBB_DYNAMIC_LOAD_ENABLED
+    #if _WIN32||_WIN64
+        #define __TBB_NO_IMPLICIT_LINKAGE 1
+        #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
+    #else
+        #define __TBB_WEAK_SYMBOLS 1
+    #endif
+#endif
+
+/** Feature sets **/
+
+#ifndef __TBB_COUNT_TASK_NODES
+    #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT
+#endif
+
+#ifndef __TBB_TASK_GROUP_CONTEXT
+    #define __TBB_TASK_GROUP_CONTEXT 1
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+#ifndef __TBB_SCHEDULER_OBSERVER
+    #define __TBB_SCHEDULER_OBSERVER 1
+#endif /* __TBB_SCHEDULER_OBSERVER */
+
+#ifndef __TBB_TASK_PRIORITY
+    #define __TBB_TASK_PRIORITY __TBB_CPF_BUILD
+#endif /* __TBB_TASK_PRIORITY */
+
+#if __TBB_TASK_PRIORITY && !__TBB_TASK_GROUP_CONTEXT
+    #error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enabled
+#endif
+
+#ifdef TBB_PREVIEW_TASK_PRIORITY
+    #if TBB_PREVIEW_TASK_PRIORITY
+        #define __TBB_NO_IMPLICIT_LINKAGE 1
+        #if __TBB_BUILD && !__TBB_TASK_PRIORITY
+            #error TBB_PREVIEW_TASK_PRIORITY requires __TBB_TASK_PRIORITY to be enabled during TBB build
+        #elif !__TBB_TASK_GROUP_CONTEXT
+            #error TBB_PREVIEW_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enabled
+        #endif
+    #endif
+#else
+    #if __TBB_BUILD
+        #define TBB_PREVIEW_TASK_PRIORITY __TBB_TASK_PRIORITY
+    #endif
+#endif /* TBB_PREVIEW_TASK_PRIORITY */
+
+#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && (_WIN32 || _WIN64 || __linux__)
+    #define __TBB_SURVIVE_THREAD_SWITCH 1
+#endif /* __TBB_SURVIVE_THREAD_SWITCH */
+
+
+/* TODO: The following condition should be extended as soon as new compilers/runtimes 
+         with std::exception_ptr support appear. */
+#define __TBB_EXCEPTION_PTR_PRESENT  (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && (__GNUC__==4 && __GNUC_MINOR__>=4))
+
+
+#ifndef TBB_USE_CAPTURED_EXCEPTION
+    #if __TBB_EXCEPTION_PTR_PRESENT
+        #define TBB_USE_CAPTURED_EXCEPTION 0
+    #else
+        #define TBB_USE_CAPTURED_EXCEPTION 1
+    #endif
+#else /* defined TBB_USE_CAPTURED_EXCEPTION */
+    #if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT
+        #error Current runtime does not support std::exception_ptr. Set TBB_USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb::captured_exception.
+    #endif
+#endif /* defined TBB_USE_CAPTURED_EXCEPTION */
+
+
+#ifndef __TBB_DEFAULT_PARTITIONER
+#if TBB_DEPRECATED
+/** Default partitioner for parallel loop templates in TBB 1.0-2.1 */
+#define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner
+#else
+/** Default partitioner for parallel loop templates in TBB 2.2 */
+#define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
+#endif /* TBB_DEFAULT_PARTITIONER */
+#endif /* !defined(__TBB_DEFAULT_PARTITIONER */
+
+/** Workarounds presence **/
+
+#if __GNUC__==4 && __GNUC_MINOR__>=4 && !defined(__INTEL_COMPILER)
+    #define __TBB_GCC_WARNING_SUPPRESSION_ENABLED 1
+#endif
+
+/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
+    the bugs in compilers, standard or OS specific libraries. They should be 
+    removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
+    versions go out of the support list. 
+**/
+
+#if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPILER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012)
+    /** Necessary to avoid ICL error (or warning in non-strict mode): 
+        "exception specification for implicitly declared virtual destructor is 
+        incompatible with that of overridden one". **/
+    #define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER < 1500 && !defined(__INTEL_COMPILER)
+    /** VS2005 and earlier do not allow declaring template class as a friend 
+        of classes defined in other namespaces. **/
+    #define __TBB_TEMPLATE_FRIENDS_BROKEN 1
+#endif
+
+#if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__
+    //! Macro controlling EH usages in TBB tests
+    /** Some older versions of glibc crash when exception handling happens concurrently. **/
+    #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
+#endif
+
+#if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110
+    /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads to a worker thread crash on the thread's startup. **/
+    #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1
+#endif
+
+#if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER)
+    /** A bug in GCC 3.3 with access to nested classes declared in protected area */
+    #define __TBB_GCC_3_3_PROTECTED_BROKEN 1
+#endif
+
+#if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2)
+    /** MinGW has a bug with stack alignment for routines invoked from MS RTLs.
+        Since GCC 4.2, the bug can be worked around via a special attribute. **/
+    #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
+#endif
+
+#if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0
+    // GCC of this version may rashly ignore control dependencies
+    #define __TBB_GCC_OPTIMIZER_ORDERING_BROKEN 1
+#endif
+
+#if __FreeBSD__
+    /** A bug in FreeBSD 8.0 results in kernel panic when there is contention 
+        on a mutex created with this attribute. **/
+    #define __TBB_PRIO_INHERIT_BROKEN 1
+
+    /** A bug in FreeBSD 8.0 results in test hanging when an exception occurs 
+        during (concurrent?) object construction by means of placement new operator. **/
+    #define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1
+#endif /* __FreeBSD__ */
+
+#if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER)
+    /** The Intel compiler for IA-32 (Linux|Mac OS X) crashes or generates 
+        incorrect code when __asm__ arguments have a cast to volatile. **/
+    #define __TBB_ICC_ASM_VOLATILE_BROKEN 1
+#endif
+
+#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+
+/* #if more recent gcc than 4.1.2 */
+#if (__TBB_GCC_VERSION > 40102 ) && !defined(__INTEL_COMPILER)
+    #define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1
+#endif
+
+#if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT)
+    #error "generic gcc port is not supported for this os/architecture."
+#endif
+#endif /* __TBB_tbb_config_H */
diff --git a/tbb/include/tbb/tbb_exception.h b/tbb/include/tbb/tbb_exception.h
new file mode 100644 (file)
index 0000000..2346690
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_exception_H
+#define __TBB_exception_H
+
+#include "tbb_stddef.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <stdexcept>
+#include <string> // required to construct std exception classes
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+namespace tbb {
+
+//! Exception for concurrent containers
+class bad_last_alloc : public std::bad_alloc {
+public:
+    /*override*/ const char* what() const throw();
+#if __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN
+    /*override*/ ~bad_last_alloc() throw() {}
+#endif
+};
+
+//! Exception for PPL locks
+class improper_lock : public std::exception {
+public:
+    /*override*/ const char* what() const throw();
+};
+
+//! Exception for missing wait on structured_task_group
+class missing_wait : public std::exception {
+public:
+    /*override*/ const char* what() const throw();
+};
+
+//! Exception for repeated scheduling of the same task_handle
+class invalid_multiple_scheduling : public std::exception {
+public:
+    /*override*/ const char* what() const throw();
+};
+
+namespace internal {
+//! Obsolete
+void __TBB_EXPORTED_FUNC throw_bad_last_alloc_exception_v4();
+
+enum exception_id {
+    eid_bad_alloc = 1,
+    eid_bad_last_alloc,
+    eid_nonpositive_step,
+    eid_out_of_range,
+    eid_segment_range_error,
+    eid_index_range_error,
+    eid_missing_wait,
+    eid_invalid_multiple_scheduling,
+    eid_improper_lock,
+    eid_possible_deadlock,
+    eid_operation_not_permitted,
+    eid_condvar_wait_failed,
+    eid_invalid_load_factor,
+    eid_reserved, // free slot for backward compatibility, can be reused.
+    eid_invalid_swap,
+    eid_reservation_length_error,
+    eid_invalid_key,
+    //! The last enumerator tracks the number of defined IDs. It must remain the last one.
+    /** When adding new IDs, place them immediately _before_ this comment (that is
+        _after_ all the existing IDs. NEVER insert new IDs between the existing ones. **/
+    eid_max
+};
+
+//! Gathers all throw operators in one place.
+/** Its purpose is to minimize code bloat that can be caused by throw operators
+    scattered in multiple places, especially in templates. **/
+void __TBB_EXPORTED_FUNC throw_exception_v4 ( exception_id );
+
+//! Versionless convenience wrapper for throw_exception_v4()
+inline void throw_exception ( exception_id eid ) { throw_exception_v4(eid); }
+
+} // namespace internal
+} // namespace tbb
+
+#if __TBB_TASK_GROUP_CONTEXT
+#include "tbb_allocator.h"
+#include <exception>
+#include <typeinfo>
+#include <new>
+
+namespace tbb {
+
+//! Interface to be implemented by all exceptions TBB recognizes and propagates across the threads.
+/** If an unhandled exception of the type derived from tbb::tbb_exception is intercepted
+    by the TBB scheduler in one of the worker threads, it is delivered to and re-thrown in
+    the root thread. The root thread is the thread that has started the outermost algorithm
+    or root task sharing the same task_group_context with the guilty algorithm/task (the one
+    that threw the exception first).
+
+    Note: when documentation mentions workers with respect to exception handling,
+    masters are implied as well, because they are completely equivalent in this context.
+    Consequently a root thread can be master or worker thread.
+
+    NOTE: In case of nested algorithms or complex task hierarchies when the nested
+    levels share (explicitly or by means of implicit inheritance) the task group
+    context of the outermost level, the exception may be (re-)thrown multiple times
+    (ultimately - in each worker on each nesting level) before reaching the root
+    thread at the outermost level. IMPORTANT: if you intercept an exception derived
+    from this class on a nested level, you must re-throw it in the catch block by means
+    of the "throw;" operator.
+
+    TBB provides two implementations of this interface: tbb::captured_exception and
+    template class tbb::movable_exception. See their declarations for more info. **/
+class tbb_exception : public std::exception
+{
+    /** No operator new is provided because the TBB usage model assumes dynamic
+        creation of the TBB exception objects only by means of applying move()
+        operation on an exception thrown out of TBB scheduler. **/
+    void* operator new ( size_t );
+
+public:
+    //! Creates and returns pointer to the deep copy of this exception object.
+    /** Move semantics is allowed. **/
+    virtual tbb_exception* move () throw() = 0;
+
+    //! Destroys objects created by the move() method.
+    /** Frees memory and calls destructor for this exception object.
+        Can and must be used only on objects created by the move method. **/
+    virtual void destroy () throw() = 0;
+
+    //! Throws this exception object.
+    /** Make sure that if you have several levels of derivation from this interface
+        you implement or override this method on the most derived level. The implementation
+        is as simple as "throw *this;". Failure to do this will result in exception
+        of a base class type being thrown. **/
+    virtual void throw_self () = 0;
+
+    //! Returns RTTI name of the originally intercepted exception
+    virtual const char* name() const throw() = 0;
+
+    //! Returns the result of originally intercepted exception's what() method.
+    virtual const char* what() const throw() = 0;
+
+    /** Operator delete is provided only to allow using existing smart pointers
+        with TBB exception objects obtained as the result of applying move()
+        operation on an exception thrown out of TBB scheduler.
+
+        When overriding method move() make sure to override operator delete as well
+        if memory is allocated not by TBB's scalable allocator. **/
+    void operator delete ( void* p ) {
+        internal::deallocate_via_handler_v3(p);
+    }
+};
+
+//! This class is used by TBB to propagate information about unhandled exceptions into the root thread.
+/** Exception of this type is thrown by TBB in the root thread (thread that started a parallel
+    algorithm ) if an unhandled exception was intercepted during the algorithm execution in one
+    of the workers.
+    \sa tbb::tbb_exception **/
+class captured_exception : public tbb_exception
+{
+public:
+    captured_exception ( const captured_exception& src )
+        : tbb_exception(src), my_dynamic(false)
+    {
+        set(src.my_exception_name, src.my_exception_info);
+    }
+
+    captured_exception ( const char* name_, const char* info )
+        : my_dynamic(false)
+    {
+        set(name_, info);
+    }
+
+    __TBB_EXPORTED_METHOD ~captured_exception () throw();
+
+    captured_exception& operator= ( const captured_exception& src ) {
+        if ( this != &src ) {
+            clear();
+            set(src.my_exception_name, src.my_exception_info);
+        }
+        return *this;
+    }
+
+    /*override*/
+    captured_exception* __TBB_EXPORTED_METHOD move () throw();
+
+    /*override*/
+    void __TBB_EXPORTED_METHOD destroy () throw();
+
+    /*override*/
+    void throw_self () { __TBB_THROW(*this); }
+
+    /*override*/
+    const char* __TBB_EXPORTED_METHOD name() const throw();
+
+    /*override*/
+    const char* __TBB_EXPORTED_METHOD what() const throw();
+
+    void __TBB_EXPORTED_METHOD set ( const char* name, const char* info ) throw();
+    void __TBB_EXPORTED_METHOD clear () throw();
+
+private:
+    //! Used only by method clone().
+    captured_exception() {}
+
+    //! Functionally equivalent to {captured_exception e(name,info); return e.clone();}
+    static captured_exception* allocate ( const char* name, const char* info );
+
+    bool my_dynamic;
+    const char* my_exception_name;
+    const char* my_exception_info;
+};
+
+//! Template that can be used to implement exception that transfers arbitrary ExceptionData to the root thread
+/** Code using TBB can instantiate this template with an arbitrary ExceptionData type
+    and throw this exception object. Such exceptions are intercepted by the TBB scheduler
+    and delivered to the root thread ().
+    \sa tbb::tbb_exception **/
+template<typename ExceptionData>
+class movable_exception : public tbb_exception
+{
+    typedef movable_exception<ExceptionData> self_type;
+
+public:
+    movable_exception ( const ExceptionData& data_ )
+        : my_exception_data(data_)
+        , my_dynamic(false)
+        , my_exception_name(
+#if TBB_USE_EXCEPTIONS
+        typeid(self_type).name()
+#else /* !TBB_USE_EXCEPTIONS */
+        "movable_exception"
+#endif /* !TBB_USE_EXCEPTIONS */
+        )
+    {}
+
+    movable_exception ( const movable_exception& src ) throw ()
+        : tbb_exception(src)
+        , my_exception_data(src.my_exception_data)
+        , my_dynamic(false)
+        , my_exception_name(src.my_exception_name)
+    {}
+
+    ~movable_exception () throw() {}
+
+    const movable_exception& operator= ( const movable_exception& src ) {
+        if ( this != &src ) {
+            my_exception_data = src.my_exception_data;
+            my_exception_name = src.my_exception_name;
+        }
+        return *this;
+    }
+
+    ExceptionData& data () throw() { return my_exception_data; }
+
+    const ExceptionData& data () const throw() { return my_exception_data; }
+
+    /*override*/ const char* name () const throw() { return my_exception_name; }
+
+    /*override*/ const char* what () const throw() { return "tbb::movable_exception"; }
+
+    /*override*/
+    movable_exception* move () throw() {
+        void* e = internal::allocate_via_handler_v3(sizeof(movable_exception));
+        if ( e ) {
+            ::new (e) movable_exception(*this);
+            ((movable_exception*)e)->my_dynamic = true;
+        }
+        return (movable_exception*)e;
+    }
+    /*override*/
+    void destroy () throw() {
+        __TBB_ASSERT ( my_dynamic, "Method destroy can be called only on dynamically allocated movable_exceptions" );
+        if ( my_dynamic ) {
+            this->~movable_exception();
+            internal::deallocate_via_handler_v3(this);
+        }
+    }
+    /*override*/
+    void throw_self () { __TBB_THROW( *this ); }
+
+protected:
+    //! User data
+    ExceptionData  my_exception_data;
+
+private:
+    //! Flag specifying whether this object has been dynamically allocated (by the move method)
+    bool my_dynamic;
+
+    //! RTTI name of this class
+    /** We rely on the fact that RTTI names are static string constants. **/
+    const char* my_exception_name;
+};
+
+#if !TBB_USE_CAPTURED_EXCEPTION
+namespace internal {
+
+//! Exception container that preserves the exact copy of the original exception
+/** This class can be used only when the appropriate runtime support (mandated
+    by C++0x) is present **/
+class tbb_exception_ptr {
+    std::exception_ptr  my_ptr;
+
+public:
+    static tbb_exception_ptr* allocate ();
+    static tbb_exception_ptr* allocate ( const tbb_exception& tag );
+    //! This overload uses move semantics (i.e. it empties src)
+    static tbb_exception_ptr* allocate ( captured_exception& src );
+
+    //! Destroys this objects
+    /** Note that objects of this type can be created only by the allocate() method. **/
+    void destroy () throw();
+
+    //! Throws the contained exception .
+    void throw_self () { std::rethrow_exception(my_ptr); }
+
+private:
+    tbb_exception_ptr ( const std::exception_ptr& src ) : my_ptr(src) {}
+    tbb_exception_ptr ( const captured_exception& src ) : my_ptr(std::copy_exception(src)) {}
+}; // class tbb::internal::tbb_exception_ptr
+
+} // namespace internal
+#endif /* !TBB_USE_CAPTURED_EXCEPTION */
+
+} // namespace tbb
+
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+#endif /* __TBB_exception_H */
diff --git a/tbb/include/tbb/tbb_machine.h b/tbb/include/tbb/tbb_machine.h
new file mode 100644 (file)
index 0000000..8b43a12
--- /dev/null
@@ -0,0 +1,715 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_machine_H
+#define __TBB_machine_H
+
+#include "tbb_stddef.h"
+
+#if _WIN32||_WIN64
+
+#ifdef _MANAGED
+#pragma managed(push, off)
+#endif
+
+    #if __MINGW64__ || __MINGW32__
+        extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
+        #define __TBB_Yield()  SwitchToThread()
+        #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
+            #include "machine/gcc_generic.h"
+        #elif __MINGW64__
+            #include "machine/linux_intel64.h"
+        #elif __MINGW32__
+            #include "machine/linux_ia32.h"
+        #endif
+    #elif defined(_M_IX86)
+        #include "machine/windows_ia32.h"
+    #elif defined(_M_AMD64) 
+        #include "machine/windows_intel64.h"
+    #elif _XBOX 
+        #include "machine/xbox360_ppc.h"
+    #endif
+
+#ifdef _MANAGED
+#pragma managed(pop)
+#endif
+
+#elif __linux__ || __FreeBSD__ || __NetBSD__
+
+    #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
+        #include "machine/gcc_generic.h"
+    #elif __i386__
+        #include "machine/linux_ia32.h"
+    #elif __x86_64__
+        #include "machine/linux_intel64.h"
+    #elif __ia64__
+        #include "machine/linux_ia64.h"
+    #elif __powerpc__
+        #include "machine/mac_ppc.h"
+    #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
+        #include "machine/gcc_generic.h"
+    #endif
+    #include "machine/linux_common.h"
+
+#elif __APPLE__
+
+    #if __i386__
+        #include "machine/linux_ia32.h"
+    #elif __x86_64__
+        #include "machine/linux_intel64.h"
+    #elif __POWERPC__
+        #include "machine/mac_ppc.h"
+    #endif
+    #include "machine/macos_common.h"
+
+#elif _AIX
+
+    #include "machine/ibm_aix51.h"
+
+#elif __sun || __SUNPRO_CC
+
+    #define __asm__ asm 
+    #define __volatile__ volatile
+    
+    #if __i386  || __i386__
+        #include "machine/linux_ia32.h"
+    #elif __x86_64__
+        #include "machine/linux_intel64.h"
+    #elif __sparc
+        #include "machine/sunos_sparc.h"
+    #endif
+    #include <sched.h>
+
+    #define __TBB_Yield() sched_yield()
+
+#endif /* OS selection */
+
+#ifndef __TBB_64BIT_ATOMICS
+#define __TBB_64BIT_ATOMICS 1
+#endif
+
+//! Prerequisites for each architecture port
+/** There are no generic implementation for these macros so they have to be implemented
+    in each machine architecture specific header.
+
+    __TBB_full_memory_fence must prevent all memory operations from being reordered 
+    across the fence. And all such fences must be totally ordered (or sequentially 
+    consistent). These fence must affect both compiler and hardware.
+    
+    __TBB_release_consistency_helper is used to enforce guarantees of acquire or 
+    release semantics in generic implementations of __TBB_load_with_acquire and 
+    __TBB_store_with_release below. Depending on the particular combination of
+    architecture+compiler it can be a hardware fence, a compiler fence, both or
+    nothing. **/
+#if    !defined(__TBB_CompareAndSwap4) \
+    || !defined(__TBB_CompareAndSwap8) && __TBB_64BIT_ATOMICS \
+    || !defined(__TBB_Yield)           \
+    || !defined(__TBB_full_memory_fence)    \
+    || !defined(__TBB_release_consistency_helper)
+#error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
+#endif
+
+#ifndef __TBB_Pause
+    inline void __TBB_Pause(int32_t) {
+        __TBB_Yield();
+    }
+#endif
+
+namespace tbb {
+
+//! Sequentially consistent full memory fence.
+inline void atomic_fence () { __TBB_full_memory_fence(); }
+
+namespace internal {
+
+//! Class that implements exponential backoff.
+/** See implementation of spin_wait_while_eq for an example. */
+class atomic_backoff : no_copy {
+    //! Time delay, in units of "pause" instructions. 
+    /** Should be equal to approximately the number of "pause" instructions
+        that take the same time as an context switch. */
+    static const int32_t LOOPS_BEFORE_YIELD = 16;
+    int32_t count;
+public:
+    atomic_backoff() : count(1) {}
+
+    //! Pause for a while.
+    void pause() {
+        if( count<=LOOPS_BEFORE_YIELD ) {
+            __TBB_Pause(count);
+            // Pause twice as long the next time.
+            count*=2;
+        } else {
+            // Pause is so long that we might as well yield CPU to scheduler.
+            __TBB_Yield();
+        }
+    }
+
+    // pause for a few times and then return false immediately.
+    bool bounded_pause() {
+        if( count<=LOOPS_BEFORE_YIELD ) {
+            __TBB_Pause(count);
+            // Pause twice as long the next time.
+            count*=2;
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    void reset() {
+        count = 1;
+    }
+};
+
+//! Spin WHILE the value of the variable is equal to a given value
+/** T and U should be comparable types. */
+template<typename T, typename U>
+void spin_wait_while_eq( const volatile T& location, U value ) {
+    atomic_backoff backoff;
+    while( location==value ) backoff.pause();
+}
+
+//! Spin UNTIL the value of the variable is equal to a given value
+/** T and U should be comparable types. */
+template<typename T, typename U>
+void spin_wait_until_eq( const volatile T& location, const U value ) {
+    atomic_backoff backoff;
+    while( location!=value ) backoff.pause();
+}
+
+// T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
+// S should be either 1 or 2, for the mask calculation to work correctly.
+// Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
+template<size_t S, typename T>
+inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
+    volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
+#if __TBB_BIG_ENDIAN
+    const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
+#else
+    const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
+#endif
+    const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
+    atomic_backoff b;
+    uint32_t result;
+    for(;;) {
+        result = *base; // reload the base value which might change during the pause
+        uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
+        uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
+        // __TBB_CompareAndSwap4 presumed to have full fence. 
+        result = __TBB_CompareAndSwap4( base, new_value, old_value );
+        if(  result==old_value               // CAS succeeded
+          || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
+            break;
+        else                                 // CAS failed but the bits of interest left unchanged
+            b.pause();
+    }
+    return T((result & mask) >> bitoffset);
+}
+
+template<size_t S, typename T>
+inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
+    return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
+}
+
+template<>
+inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
+#ifdef __TBB_CompareAndSwap1
+    return __TBB_CompareAndSwap1(ptr,value,comparand);
+#else
+    return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
+#endif
+}
+
+template<>
+inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
+#ifdef __TBB_CompareAndSwap2
+    return __TBB_CompareAndSwap2(ptr,value,comparand);
+#else
+    return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
+#endif
+}
+
+template<>
+inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
+    return __TBB_CompareAndSwap4(ptr,value,comparand);
+}
+
+#if __TBB_64BIT_ATOMICS
+template<>
+inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
+    return __TBB_CompareAndSwap8(ptr,value,comparand);
+}
+#endif
+
+template<size_t S, typename T>
+inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
+    atomic_backoff b;
+    T result;
+    for(;;) {
+        result = *reinterpret_cast<volatile T *>(ptr);
+        // __TBB_CompareAndSwapGeneric presumed to have full fence. 
+        if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
+            break;
+        b.pause();
+    }
+    return result;
+}
+
+template<size_t S, typename T>
+inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
+    atomic_backoff b;
+    T result;
+    for(;;) {
+        result = *reinterpret_cast<volatile T *>(ptr);
+        // __TBB_CompareAndSwapGeneric presumed to have full fence.
+        if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
+            break;
+        b.pause();
+    }
+    return result;
+}
+
+// Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
+// strict as type T.  Type type should have a trivial default constructor and destructor, so that
+// arrays of that type can be declared without initializers.  
+// It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
+// to a type bigger than T.
+// The default definition here works on machines where integers are naturally aligned and the
+// strictest alignment is 16.
+#ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
+
+#if __GNUC__ || __SUNPRO_CC || __IBMCPP__
+struct __TBB_machine_type_with_strictest_alignment {
+    int member[4];
+} __attribute__((aligned(16)));
+#elif _MSC_VER
+__declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
+    int member[4];
+};
+#else
+#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
+#endif
+
+template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
+template<> struct type_with_alignment<1> { char member; };
+template<> struct type_with_alignment<2> { uint16_t member; };
+template<> struct type_with_alignment<4> { uint32_t member; };
+template<> struct type_with_alignment<8> { uint64_t member; };
+
+#if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
+//! Work around for bug in GNU 3.2 and MSVC compilers.
+/** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated.
+    The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */
+template<size_t Size, typename T> 
+struct work_around_alignment_bug {
+#if _MSC_VER
+    static const size_t alignment = __alignof(T);
+#else
+    static const size_t alignment = __alignof__(T);
+#endif
+};
+#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
+#elif __GNUC__ || __SUNPRO_CC || __IBMCPP__
+#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
+#else
+#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
+#endif
+#endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
+
+// Template class here is to avoid instantiation of the static data for modules that don't use it
+template<typename T>
+struct reverse {
+    static const T byte_table[256];
+};
+// An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
+// values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
+template<typename T>
+const T reverse<T>::byte_table[256] = {
+    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+
+} // namespace internal
+} // namespace tbb
+
+#ifndef __TBB_CompareAndSwap1
+#define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
+#endif
+
+#ifndef __TBB_CompareAndSwap2 
+#define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
+#endif
+
+#ifndef __TBB_CompareAndSwapW
+#define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
+#endif
+
+#ifndef __TBB_FetchAndAdd1
+#define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
+#endif
+
+#ifndef __TBB_FetchAndAdd2
+#define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
+#endif
+
+#ifndef __TBB_FetchAndAdd4
+#define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
+#endif
+
+#ifndef __TBB_FetchAndAdd8
+#define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
+#endif
+
+#ifndef __TBB_FetchAndAddW
+#define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
+#endif
+
+#ifndef __TBB_FetchAndStore1
+#define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
+#endif
+
+#ifndef __TBB_FetchAndStore2
+#define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
+#endif
+
+#ifndef __TBB_FetchAndStore4
+#define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
+#endif
+
+#ifndef __TBB_FetchAndStore8
+#define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
+#endif
+
+#ifndef __TBB_FetchAndStoreW
+#define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
+#endif
+
+#if __TBB_DECL_FENCED_ATOMICS
+
+#ifndef __TBB_CompareAndSwap1__TBB_full_fence
+#define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
+#endif 
+#ifndef __TBB_CompareAndSwap1acquire
+#define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
+#endif 
+#ifndef __TBB_CompareAndSwap1release
+#define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
+#endif 
+
+#ifndef __TBB_CompareAndSwap2__TBB_full_fence
+#define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
+#endif
+#ifndef __TBB_CompareAndSwap2acquire
+#define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
+#endif
+#ifndef __TBB_CompareAndSwap2release
+#define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
+#endif
+
+#ifndef __TBB_CompareAndSwap4__TBB_full_fence
+#define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
+#endif 
+#ifndef __TBB_CompareAndSwap4acquire
+#define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
+#endif 
+#ifndef __TBB_CompareAndSwap4release
+#define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
+#endif 
+
+#ifndef __TBB_CompareAndSwap8__TBB_full_fence
+#define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
+#endif
+#ifndef __TBB_CompareAndSwap8acquire
+#define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
+#endif
+#ifndef __TBB_CompareAndSwap8release
+#define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndAdd1__TBB_full_fence
+#define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
+#endif
+#ifndef __TBB_FetchAndAdd1acquire
+#define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndAdd1release
+#define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndAdd2__TBB_full_fence
+#define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
+#endif
+#ifndef __TBB_FetchAndAdd2acquire
+#define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndAdd2release
+#define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndAdd4__TBB_full_fence
+#define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
+#endif
+#ifndef __TBB_FetchAndAdd4acquire
+#define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndAdd4release
+#define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndAdd8__TBB_full_fence
+#define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
+#endif
+#ifndef __TBB_FetchAndAdd8acquire
+#define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndAdd8release
+#define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndStore1__TBB_full_fence
+#define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
+#endif
+#ifndef __TBB_FetchAndStore1acquire
+#define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndStore1release
+#define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndStore2__TBB_full_fence
+#define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
+#endif
+#ifndef __TBB_FetchAndStore2acquire
+#define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndStore2release
+#define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndStore4__TBB_full_fence
+#define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
+#endif
+#ifndef __TBB_FetchAndStore4acquire
+#define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndStore4release
+#define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
+#endif
+
+#ifndef __TBB_FetchAndStore8__TBB_full_fence
+#define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
+#endif
+#ifndef __TBB_FetchAndStore8acquire
+#define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
+#endif
+#ifndef __TBB_FetchAndStore8release
+#define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
+#endif
+
+#endif // __TBB_DECL_FENCED_ATOMICS
+
+// Special atomic functions
+#ifndef __TBB_FetchAndAddWrelease
+#define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
+#endif
+
+#ifndef __TBB_FetchAndIncrementWacquire
+#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
+#endif
+
+#ifndef __TBB_FetchAndDecrementWrelease
+#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
+#endif
+
+template <typename T, size_t S>
+struct __TBB_machine_load_store {
+    static inline T load_with_acquire(const volatile T& location) {
+        T to_return = location;
+        __TBB_release_consistency_helper();
+        return to_return;
+    }
+
+    static inline void store_with_release(volatile T &location, T value) {
+        __TBB_release_consistency_helper();
+        location = value;
+    }
+};
+
+#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
+#if _MSC_VER
+using tbb::internal::int64_t;
+#endif
+// On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
+#ifndef __TBB_Store8
+inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
+    for(;;) {
+        int64_t result = *(int64_t *)ptr;
+        if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
+    }
+}
+#endif
+
+#ifndef __TBB_Load8
+inline int64_t __TBB_Load8 (const volatile void *ptr) {
+    const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
+    return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
+}
+#endif
+
+template <typename T>
+struct __TBB_machine_load_store<T,8> {
+    static inline T load_with_acquire(const volatile T& location) {
+        T to_return = (T)__TBB_Load8((const volatile void*)&location);
+        __TBB_release_consistency_helper();
+        return to_return;
+    }
+
+    static inline void store_with_release(volatile T& location, T value) {
+        __TBB_release_consistency_helper();
+        __TBB_Store8((volatile void *)&location,(int64_t)value);
+    }
+};
+#endif /* __TBB_WORDSIZE==4 */
+
+#ifndef __TBB_load_with_acquire
+template<typename T>
+inline T __TBB_load_with_acquire(const volatile T &location) {
+    return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
+}
+#endif
+
+#ifndef __TBB_store_with_release
+template<typename T, typename V>
+inline void __TBB_store_with_release(volatile T& location, V value) {
+    __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
+}
+//! Overload that exists solely to avoid /Wp64 warnings.
+inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
+    __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
+}
+#endif
+
+#ifndef __TBB_Log2
+inline intptr_t __TBB_Log2( uintptr_t x ) {
+    if( x==0 ) return -1;
+    intptr_t result = 0;
+    uintptr_t tmp;
+#if __TBB_WORDSIZE>=8
+    if( (tmp = x>>32) ) { x=tmp; result += 32; }
+#endif
+    if( (tmp = x>>16) ) { x=tmp; result += 16; }
+    if( (tmp = x>>8) )  { x=tmp; result += 8; }
+    if( (tmp = x>>4) )  { x=tmp; result += 4; }
+    if( (tmp = x>>2) )  { x=tmp; result += 2; }
+    return (x&2)? result+1: result;
+}
+#endif
+
+#ifndef __TBB_AtomicOR
+inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
+    tbb::internal::atomic_backoff b;
+    for(;;) {
+        uintptr_t tmp = *(volatile uintptr_t *)operand;
+        uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
+        if( result==tmp ) break;
+        b.pause();
+    }
+}
+#endif
+
+#ifndef __TBB_AtomicAND
+inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
+    tbb::internal::atomic_backoff b;
+    for(;;) {
+        uintptr_t tmp = *(volatile uintptr_t *)operand;
+        uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
+        if( result==tmp ) break;
+        b.pause();
+    }
+}
+#endif
+
+#ifndef __TBB_Byte
+typedef unsigned char __TBB_Byte;
+#endif
+
+#ifndef __TBB_TryLockByte
+inline bool __TBB_TryLockByte( __TBB_Byte &flag ) {
+    return __TBB_CompareAndSwap1(&flag,1,0)==0;
+}
+#endif
+
+#ifndef __TBB_LockByte
+inline uintptr_t __TBB_LockByte( __TBB_Byte& flag ) {
+    if ( !__TBB_TryLockByte(flag) ) {
+        tbb::internal::atomic_backoff b;
+        do {
+            b.pause();
+        } while ( !__TBB_TryLockByte(flag) );
+    }
+    return 0;
+}
+#endif
+
+#define __TBB_UnlockByte __TBB_store_with_release
+
+#ifndef __TBB_ReverseByte
+inline unsigned char __TBB_ReverseByte(unsigned char src) {
+    return tbb::internal::reverse<unsigned char>::byte_table[src];
+}
+#endif
+
+template<typename T>
+T __TBB_ReverseBits(T src)
+{
+    T dst;
+    unsigned char *original = (unsigned char *) &src;
+    unsigned char *reversed = (unsigned char *) &dst;
+
+    for( int i = sizeof(T)-1; i >= 0; i-- )
+        reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
+
+    return dst;
+}
+
+#endif /* __TBB_machine_H */
diff --git a/tbb/include/tbb/tbb_profiling.h b/tbb/include/tbb/tbb_profiling.h
new file mode 100644 (file)
index 0000000..a56039f
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_profiling_H
+#define __TBB_profiling_H
+
+// Check if the tools support is enabled
+#if (_WIN32||_WIN64||__linux__) && !__MINGW32__ && TBB_USE_THREADING_TOOLS
+
+#if _WIN32||_WIN64
+#include <stdlib.h>  /* mbstowcs_s */
+#endif
+#include "tbb_stddef.h"
+
+namespace tbb {
+    namespace internal {
+#if _WIN32||_WIN64
+        void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const wchar_t* name ); 
+        inline size_t multibyte_to_widechar( wchar_t* wcs, const char* mbs, size_t bufsize) {
+#if _MSC_VER>=1400
+            size_t len;
+            mbstowcs_s( &len, wcs, bufsize, mbs, _TRUNCATE );
+            return len;   // mbstowcs_s counts null terminator
+#else
+            size_t len = mbstowcs( wcs, mbs, bufsize );
+            if(wcs && len!=size_t(-1) )
+                wcs[len<bufsize-1? len: bufsize-1] = wchar_t('\0');
+            return len+1; // mbstowcs does not count null terminator
+#endif
+        }
+#else
+        void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const char* name ); 
+#endif
+    } // namespace internal
+} // namespace tbb
+
+//! Macro __TBB_DEFINE_PROFILING_SET_NAME(T) defines "set_name" methods for sync objects of type T
+/** Should be used in the "tbb" namespace only. 
+    Don't place semicolon after it to avoid compiler warnings. **/
+#if _WIN32||_WIN64
+    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)    \
+        namespace profiling {                                                       \
+            inline void set_name( sync_object_type& obj, const wchar_t* name ) {    \
+                tbb::internal::itt_set_sync_name_v3( &obj, name );                  \
+            }                                                                       \
+            inline void set_name( sync_object_type& obj, const char* name ) {       \
+                size_t len = tbb::internal::multibyte_to_widechar(NULL, name, 0);   \
+                wchar_t *wname = new wchar_t[len];                                  \
+                tbb::internal::multibyte_to_widechar(wname, name, len);             \
+                set_name( obj, wname );                                             \
+                delete[] wname;                                                     \
+            }                                                                       \
+        }
+#else /* !WIN */
+    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)    \
+        namespace profiling {                                                       \
+            inline void set_name( sync_object_type& obj, const char* name ) {       \
+                tbb::internal::itt_set_sync_name_v3( &obj, name );                  \
+            }                                                                       \
+        }
+#endif /* !WIN */
+
+#else /* no tools support */
+
+#if _WIN32||_WIN64
+    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)    \
+        namespace profiling {                                               \
+            inline void set_name( sync_object_type&, const wchar_t* ) {}    \
+            inline void set_name( sync_object_type&, const char* ) {}       \
+        }
+#else /* !WIN */
+    #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type)    \
+        namespace profiling {                                               \
+            inline void set_name( sync_object_type&, const char* ) {}       \
+        }
+#endif /* !WIN */
+
+#endif /* no tools support */
+
+#include "atomic.h"
+// Need these to work regardless of tools support
+namespace tbb {
+    namespace internal {
+
+        enum notify_type {prepare=0, cancel, acquired, releasing};
+        const uintptr_t NUM_NOTIFY_TYPES = 4; // set to # elements in enum above
+        
+        void __TBB_EXPORTED_FUNC call_itt_notify_v5(int t, void *ptr);
+        void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3(void *dst, void *src);
+        void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3(const void *src);
+        void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
+
+        // two template arguments are to workaround /Wp64 warning with tbb::atomic specialized for unsigned type
+        template <typename T, typename U>
+        inline void itt_store_word_with_release(tbb::atomic<T>& dst, U src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
+            itt_store_pointer_with_release_v3(&dst, (void *)uintptr_t(src));
+#else
+            dst = src;
+#endif // TBB_USE_THREADING_TOOLS
+        }
+
+        template <typename T>
+        inline T itt_load_word_with_acquire(const tbb::atomic<T>& src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+            // Workaround for overzealous compiler warnings 
+            #pragma warning (push)
+            #pragma warning (disable: 4311)
+#endif
+            T result = (T)itt_load_pointer_with_acquire_v3(&src);
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+            #pragma warning (pop)
+#endif
+            return result;
+#else
+            return src;
+#endif // TBB_USE_THREADING_TOOLS
+        }
+
+        template <typename T>
+        inline void itt_store_word_with_release(T& dst, T src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
+            itt_store_pointer_with_release_v3(&dst, (void *)src);
+#else
+            __TBB_store_with_release(dst, src); 
+#endif // TBB_USE_THREADING_TOOLS
+        }
+
+        template <typename T>
+        inline T itt_load_word_with_acquire(const T& src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
+            return (T)itt_load_pointer_with_acquire_v3(&src);
+#else
+            return __TBB_load_with_acquire(src);
+#endif // TBB_USE_THREADING_TOOLS
+        }
+        
+        template <typename T>
+        inline void itt_hide_store_word(T& dst, T src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
+            itt_store_pointer_with_release_v3(&dst, (void *)src);
+#else
+            dst = src;
+#endif
+        }
+
+        template <typename T>
+        inline T itt_hide_load_word(const T& src) {
+#if TBB_USE_THREADING_TOOLS
+            // This assertion should be replaced with static_assert
+            __TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
+            return (T)itt_load_pointer_v3(&src);
+#else
+            return src;
+#endif
+        }
+
+#if TBB_USE_THREADING_TOOLS
+        inline void call_itt_notify(notify_type t, void *ptr) {
+            call_itt_notify_v5((int)t, ptr);
+        }
+#else
+        inline void call_itt_notify(notify_type /*t*/, void * /*ptr*/) {}
+#endif // TBB_USE_THREADING_TOOLS
+
+    } // namespace internal
+} // namespace tbb
+
+#endif /* __TBB_profiling_H */
diff --git a/tbb/include/tbb/tbb_stddef.h b/tbb/include/tbb/tbb_stddef.h
new file mode 100644 (file)
index 0000000..2e834ac
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_stddef_H
+#define __TBB_tbb_stddef_H
+
+// Marketing-driven product version
+#define TBB_VERSION_MAJOR 3
+#define TBB_VERSION_MINOR 0
+
+// Engineering-focused interface version
+#define TBB_INTERFACE_VERSION 5006
+#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
+
+// The oldest major interface version still supported
+// To be used in SONAME, manifests, etc.
+#define TBB_COMPATIBLE_INTERFACE_VERSION 2
+
+#define __TBB_STRING_AUX(x) #x
+#define __TBB_STRING(x) __TBB_STRING_AUX(x)
+
+// We do not need defines below for resource processing on windows
+#if !defined RC_INVOKED
+
+// Define groups for Doxygen documentation
+/**
+ * @defgroup algorithms         Algorithms
+ * @defgroup containers         Containers
+ * @defgroup memory_allocation  Memory Allocation
+ * @defgroup synchronization    Synchronization
+ * @defgroup timing             Timing
+ * @defgroup task_scheduling    Task Scheduling
+ */
+
+// Simple text that is displayed on the main page of Doxygen documentation.
+/**
+ * \mainpage Main Page
+ *
+ * Click the tabs above for information about the
+ * - <a href="./modules.html">Modules</a> (groups of functionality) implemented by the library 
+ * - <a href="./annotated.html">Classes</a> provided by the library
+ * - <a href="./files.html">Files</a> constituting the library.
+ * .
+ * Please note that significant part of TBB functionality is implemented in the form of
+ * template functions, descriptions of which are not accessible on the <a href="./annotated.html">Classes</a>
+ * tab. Use <a href="./modules.html">Modules</a> or <a href="./namespacemembers.html">Namespace/Namespace Members</a>
+ * tabs to find them.
+ *
+ * Additional pieces of information can be found here
+ * - \subpage concepts
+ * .
+ */
+
+/** \page concepts TBB concepts
+    
+    A concept is a set of requirements to a type, which are necessary and sufficient
+    for the type to model a particular behavior or a set of behaviors. Some concepts 
+    are specific to a particular algorithm (e.g. algorithm body), while other ones 
+    are common to several algorithms (e.g. range concept). 
+
+    All TBB algorithms make use of different classes implementing various concepts.
+    Implementation classes are supplied by the user as type arguments of template 
+    parameters and/or as objects passed as function call arguments. The library 
+    provides predefined  implementations of some concepts (e.g. several kinds of 
+    \ref range_req "ranges"), while other ones must always be implemented by the user. 
+    
+    TBB defines a set of minimal requirements each concept must conform to. Here is 
+    the list of different concepts hyperlinked to the corresponding requirements specifications:
+    - \subpage range_req
+    - \subpage parallel_do_body_req
+    - \subpage parallel_for_body_req
+    - \subpage parallel_reduce_body_req
+    - \subpage parallel_scan_body_req
+    - \subpage parallel_sort_iter_req
+**/
+
+// Define preprocessor symbols used to determine architecture
+#if _WIN32||_WIN64
+#   if defined(_M_X64)||defined(__x86_64__)  // the latter for MinGW support
+#       define __TBB_x86_64 1
+#   elif defined(_M_IA64)
+#       define __TBB_ipf 1
+#   elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW support
+#       define __TBB_x86_32 1
+#   endif
+#else /* Assume generic Unix */
+#   if !__linux__ && !__APPLE__
+#       define __TBB_generic_os 1
+#   endif
+#   if __x86_64__
+#       define __TBB_x86_64 1
+#   elif __ia64__
+#       define __TBB_ipf 1
+#   elif __i386__||__i386  // __i386 is for Sun OS
+#       define __TBB_x86_32 1
+#   else
+#       define __TBB_generic_arch 1
+#   endif
+#endif
+
+// tbb_config.h should be included the first since it contains macro definitions used in other headers
+#include "tbb_config.h"
+
+#if _MSC_VER
+// define the parts of stdint.h that are needed, but put them inside tbb::internal
+namespace tbb {
+namespace internal {
+    typedef __int8 int8_t;
+    typedef __int16 int16_t;
+    typedef __int32 int32_t;
+    typedef __int64 int64_t;
+    typedef unsigned __int8 uint8_t;
+    typedef unsigned __int16 uint16_t;
+    typedef unsigned __int32 uint32_t;
+    typedef unsigned __int64 uint64_t;
+} // namespace internal
+} // namespace tbb
+#else
+#include <stdint.h>
+#endif /* _MSC_VER */
+
+#if _MSC_VER >=1400
+#define __TBB_EXPORTED_FUNC   __cdecl
+#define __TBB_EXPORTED_METHOD __thiscall
+#else
+#define __TBB_EXPORTED_FUNC
+#define __TBB_EXPORTED_METHOD
+#endif
+
+#include <cstddef>      /* Need size_t and ptrdiff_t */
+
+#if _MSC_VER
+#define __TBB_tbb_windef_H
+#include "_tbb_windef.h"
+#undef __TBB_tbb_windef_H
+#endif
+
+//! The namespace tbb contains all components of the library.
+namespace tbb {
+
+using std::size_t; using std::ptrdiff_t;
+
+    //! Type for an assertion handler
+    typedef void(*assertion_handler_type)( const char* filename, int line, const char* expression, const char * comment );
+
+#if TBB_USE_ASSERT
+
+//! Assert that x is true.
+/** If x is false, print assertion failure message.  
+    If the comment argument is not NULL, it is printed as part of the failure message.  
+    The comment argument has no other effect. */
+#define __TBB_ASSERT(predicate,message) ((predicate)?((void)0):tbb::assertion_failure(__FILE__,__LINE__,#predicate,message))
+#define __TBB_ASSERT_EX __TBB_ASSERT
+
+    //! Set assertion handler and return previous value of it.
+    assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( assertion_handler_type new_handler );
+
+    //! Process an assertion failure.
+    /** Normally called from __TBB_ASSERT macro.
+        If assertion handler is null, print message for assertion failure and abort.
+        Otherwise call the assertion handler. */
+    void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int line, const char* expression, const char* comment );
+
+#else
+
+//! No-op version of __TBB_ASSERT.
+#define __TBB_ASSERT(predicate,comment) ((void)0)
+//! "Extended" version is useful to suppress warnings if a variable is only used with an assert
+#define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
+
+#endif /* TBB_USE_ASSERT */
+
+//! The function returns the interface version of the TBB shared library being used.
+/**
+ * The version it returns is determined at runtime, not at compile/link time.
+ * So it can be different than the value of TBB_INTERFACE_VERSION obtained at compile time.
+ */
+extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version();
+
+//! Dummy type that distinguishes splitting constructor from copy constructor.
+/**
+ * See description of parallel_for and parallel_reduce for example usages.
+ * @ingroup algorithms
+ */
+class split {
+};
+
+/**
+ * @cond INTERNAL
+ * @brief Identifiers declared inside namespace internal should never be used directly by client code.
+ */
+namespace internal {
+
+//! Compile-time constant that is upper bound on cache line/sector size.
+/** It should be used only in situations where having a compile-time upper 
+    bound is more useful than a run-time exact answer.
+    @ingroup memory_allocation */
+const size_t NFS_MaxLineSize = 128;
+
+template<class T, int S>
+struct padded_base : T {
+    char pad[NFS_MaxLineSize - sizeof(T) % NFS_MaxLineSize];
+};
+template<class T> struct padded_base<T, 0> : T {};
+
+//! Pads type T to fill out to a multiple of cache line size.
+template<class T>
+struct padded : padded_base<T, sizeof(T)> {};
+
+//! Extended variant of the standard offsetof macro
+/** The standard offsetof macro is not sufficient for TBB as it can be used for
+    POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/
+#define __TBB_offsetof(class_name, member_name) \
+    ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000)
+
+//! Returns address of the object containing a member with the given name and address
+#define __TBB_get_object_ref(class_name, member_name, member_addr) \
+    (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name)))
+
+//! Throws std::runtime_error with what() returning error_code description prefixed with aux_info
+void __TBB_EXPORTED_FUNC handle_perror( int error_code, const char* aux_info );
+
+#if TBB_USE_EXCEPTIONS
+    #define __TBB_TRY try
+    #define __TBB_CATCH(e) catch(e)
+    #define __TBB_THROW(e) throw e
+    #define __TBB_RETHROW() throw
+#else /* !TBB_USE_EXCEPTIONS */
+    inline bool __TBB_false() { return false; }
+    #define __TBB_TRY
+    #define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() )
+    #define __TBB_THROW(e) ((void)0)
+    #define __TBB_RETHROW() ((void)0)
+#endif /* !TBB_USE_EXCEPTIONS */
+
+//! Report a runtime warning.
+void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... );
+
+#if TBB_USE_ASSERT
+static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
+
+//! Set p to invalid pointer value.
+template<typename T>
+inline void poison_pointer( T*& p ) { p = reinterpret_cast<T*>(poisoned_ptr); }
+
+/** Expected to be used in assertions only, thus no empty form is defined. **/
+template<typename T>
+inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); }
+#else
+template<typename T>
+inline void poison_pointer( T* ) {/*do nothing*/}
+#endif /* !TBB_USE_ASSERT */
+
+//! Cast pointer from U* to T.
+/** This method should be used sparingly as a last resort for dealing with 
+    situations that inherently break strict ISO C++ aliasing rules. */
+template<typename T, typename U> 
+inline T punned_cast( U* ptr ) {
+    uintptr_t x = reinterpret_cast<uintptr_t>(ptr);
+    return reinterpret_cast<T>(x);
+}
+
+//! Base class for types that should not be assigned.
+class no_assign {
+    // Deny assignment
+    void operator=( const no_assign& );
+public:
+#if __GNUC__
+    //! Explicitly define default construction, because otherwise gcc issues gratuitous warning.
+    no_assign() {}
+#endif /* __GNUC__ */
+};
+
+//! Base class for types that should not be copied or assigned.
+class no_copy: no_assign {
+    //! Deny copy construction
+    no_copy( const no_copy& );
+public:
+    //! Allow default construction
+    no_copy() {}
+};
+
+//! Class for determining type of std::allocator<T>::value_type.
+template<typename T>
+struct allocator_type {
+    typedef T value_type;
+};
+
+#if _MSC_VER
+//! Microsoft std::allocator has non-standard extension that strips const from a type. 
+template<typename T>
+struct allocator_type<const T> {
+    typedef T value_type;
+};
+#endif
+
+// Struct to be used as a version tag for inline functions.
+/** Version tag can be necessary to prevent loader on Linux from using the wrong 
+    symbol in debug builds (when inline functions are compiled as out-of-line). **/
+struct version_tag_v3 {};
+
+typedef version_tag_v3 version_tag;
+
+} // internal
+//! @endcond
+
+} // tbb
+
+#endif /* RC_INVOKED */
+#endif /* __TBB_tbb_stddef_H */
diff --git a/tbb/include/tbb/tbb_thread.h b/tbb/include/tbb/tbb_thread.h
new file mode 100644 (file)
index 0000000..41890e7
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_thread_H
+#define __TBB_tbb_thread_H
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#define __TBB_NATIVE_THREAD_ROUTINE unsigned WINAPI
+#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) unsigned (WINAPI* r)( void* )
+#else
+#define __TBB_NATIVE_THREAD_ROUTINE void*
+#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) void* (*r)( void* )
+#include <pthread.h>
+#endif // _WIN32||_WIN64
+
+#include "tbb_stddef.h"
+#include "tick_count.h"
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    // Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
+    #pragma warning (push)
+    #pragma warning (disable: 4530)
+#endif
+
+#include <iosfwd>
+
+#if !TBB_USE_EXCEPTIONS && _MSC_VER
+    #pragma warning (pop)
+#endif
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    
+    class tbb_thread_v3;
+
+} // namespace internal
+
+void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ); 
+
+namespace internal {
+
+    //! Allocate a closure
+    void* __TBB_EXPORTED_FUNC allocate_closure_v3( size_t size );
+    //! Free a closure allocated by allocate_closure_v3
+    void __TBB_EXPORTED_FUNC free_closure_v3( void* );
+   
+    struct thread_closure_base {
+        void* operator new( size_t size ) {return allocate_closure_v3(size);}
+        void operator delete( void* ptr ) {free_closure_v3(ptr);}
+    };
+
+    template<class F> struct thread_closure_0: thread_closure_base {
+        F function;
+
+        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
+            thread_closure_0 *self = static_cast<thread_closure_0*>(c);
+            self->function();
+            delete self;
+            return 0;
+        }
+        thread_closure_0( const F& f ) : function(f) {}
+    };
+    //! Structure used to pass user function with 1 argument to thread.  
+    template<class F, class X> struct thread_closure_1: thread_closure_base {
+        F function;
+        X arg1;
+        //! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
+        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
+            thread_closure_1 *self = static_cast<thread_closure_1*>(c);
+            self->function(self->arg1);
+            delete self;
+            return 0;
+        }
+        thread_closure_1( const F& f, const X& x ) : function(f), arg1(x) {}
+    };
+    template<class F, class X, class Y> struct thread_closure_2: thread_closure_base {
+        F function;
+        X arg1;
+        Y arg2;
+        //! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
+        static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
+            thread_closure_2 *self = static_cast<thread_closure_2*>(c);
+            self->function(self->arg1, self->arg2);
+            delete self;
+            return 0;
+        }
+        thread_closure_2( const F& f, const X& x, const Y& y ) : function(f), arg1(x), arg2(y) {}
+    };
+
+    //! Versioned thread class.
+    class tbb_thread_v3 {
+        tbb_thread_v3(const tbb_thread_v3&); // = delete;   // Deny access
+    public:
+#if _WIN32||_WIN64
+        typedef HANDLE native_handle_type; 
+#else
+        typedef pthread_t native_handle_type; 
+#endif // _WIN32||_WIN64
+
+        class id;
+        //! Constructs a thread object that does not represent a thread of execution. 
+        tbb_thread_v3() : my_handle(0)
+#if _WIN32||_WIN64
+            , my_thread_id(0)
+#endif // _WIN32||_WIN64
+        {}
+        
+        //! Constructs an object and executes f() in a new thread
+        template <class F> explicit tbb_thread_v3(F f) {
+            typedef internal::thread_closure_0<F> closure_type;
+            internal_start(closure_type::start_routine, new closure_type(f));
+        }
+        //! Constructs an object and executes f(x) in a new thread
+        template <class F, class X> tbb_thread_v3(F f, X x) {
+            typedef internal::thread_closure_1<F,X> closure_type;
+            internal_start(closure_type::start_routine, new closure_type(f,x));
+        }
+        //! Constructs an object and executes f(x,y) in a new thread
+        template <class F, class X, class Y> tbb_thread_v3(F f, X x, Y y) {
+            typedef internal::thread_closure_2<F,X,Y> closure_type;
+            internal_start(closure_type::start_routine, new closure_type(f,x,y));
+        }
+
+        tbb_thread_v3& operator=(tbb_thread_v3& x) {
+            if (joinable()) detach();
+            my_handle = x.my_handle;
+            x.my_handle = 0;
+#if _WIN32||_WIN64
+            my_thread_id = x.my_thread_id;
+            x.my_thread_id = 0;
+#endif // _WIN32||_WIN64
+            return *this;
+        }
+        void swap( tbb_thread_v3& t ) {tbb::swap( *this, t );}
+        bool joinable() const {return my_handle!=0; }
+        //! The completion of the thread represented by *this happens before join() returns.
+        void __TBB_EXPORTED_METHOD join();
+        //! When detach() returns, *this no longer represents the possibly continuing thread of execution.
+        void __TBB_EXPORTED_METHOD detach();
+        ~tbb_thread_v3() {if( joinable() ) detach();}
+        inline id get_id() const;
+        native_handle_type native_handle() { return my_handle; }
+    
+        //! The number of hardware thread contexts.
+        /** Before TBB 3.0 U4 this methods returned the number of logical CPU in
+            the system. Currently on Windows, Linux and FreeBSD it returns the
+            number of logical CPUs available to the current process in accordance
+            with its affinity mask.
+            
+            NOTE: The return value of this method never changes after its first
+            invocation. This means that changes in the process affinity mask that
+            took place after this method was first invoked will not affect the
+            number of worker threads in the TBB worker threads pool. **/
+        static unsigned __TBB_EXPORTED_FUNC hardware_concurrency();
+    private:
+        native_handle_type my_handle; 
+#if _WIN32||_WIN64
+        DWORD my_thread_id;
+#endif // _WIN32||_WIN64
+
+        /** Runs start_routine(closure) on another thread and sets my_handle to the handle of the created thread. */
+        void __TBB_EXPORTED_METHOD internal_start( __TBB_NATIVE_THREAD_ROUTINE_PTR(start_routine), 
+                             void* closure );
+        friend void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
+        friend void tbb::swap( tbb_thread_v3& t1, tbb_thread_v3& t2 ); 
+    };
+        
+    class tbb_thread_v3::id { 
+#if _WIN32||_WIN64
+        DWORD my_id;
+        id( DWORD id_ ) : my_id(id_) {}
+#else
+        pthread_t my_id;
+        id( pthread_t id_ ) : my_id(id_) {}
+#endif // _WIN32||_WIN64
+        friend class tbb_thread_v3;
+    public:
+        id() : my_id(0) {}
+
+        friend bool operator==( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        friend bool operator!=( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        friend bool operator<( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        friend bool operator<=( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        friend bool operator>( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        friend bool operator>=( tbb_thread_v3::id x, tbb_thread_v3::id y );
+        
+        template<class charT, class traits>
+        friend std::basic_ostream<charT, traits>&
+        operator<< (std::basic_ostream<charT, traits> &out, 
+                    tbb_thread_v3::id id)
+        {
+            out << id.my_id;
+            return out;
+        }
+        friend tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
+    }; // tbb_thread_v3::id
+
+    tbb_thread_v3::id tbb_thread_v3::get_id() const {
+#if _WIN32||_WIN64
+        return id(my_thread_id);
+#else
+        return id(my_handle);
+#endif // _WIN32||_WIN64
+    }
+    void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
+    tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
+    void __TBB_EXPORTED_FUNC thread_yield_v3();
+    void __TBB_EXPORTED_FUNC thread_sleep_v3(const tick_count::interval_t &i);
+
+    inline bool operator==(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id == y.my_id;
+    }
+    inline bool operator!=(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id != y.my_id;
+    }
+    inline bool operator<(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id < y.my_id;
+    }
+    inline bool operator<=(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id <= y.my_id;
+    }
+    inline bool operator>(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id > y.my_id;
+    }
+    inline bool operator>=(tbb_thread_v3::id x, tbb_thread_v3::id y)
+    {
+        return x.my_id >= y.my_id;
+    }
+
+} // namespace internal;
+
+//! Users reference thread class by name tbb_thread
+typedef internal::tbb_thread_v3 tbb_thread;
+
+using internal::operator==;
+using internal::operator!=;
+using internal::operator<;
+using internal::operator>;
+using internal::operator<=;
+using internal::operator>=;
+
+inline void move( tbb_thread& t1, tbb_thread& t2 ) {
+    internal::move_v3(t1, t2);
+}
+
+inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ) {
+    tbb::tbb_thread::native_handle_type h = t1.my_handle;
+    t1.my_handle = t2.my_handle;
+    t2.my_handle = h;
+#if _WIN32||_WIN64
+    DWORD i = t1.my_thread_id;
+    t1.my_thread_id = t2.my_thread_id;
+    t2.my_thread_id = i;
+#endif /* _WIN32||_WIN64 */
+}
+
+namespace this_tbb_thread {
+    inline tbb_thread::id get_id() { return internal::thread_get_id_v3(); }
+    //! Offers the operating system the opportunity to schedule another thread.
+    inline void yield() { internal::thread_yield_v3(); }
+    //! The current thread blocks at least until the time specified.
+    inline void sleep(const tick_count::interval_t &i) { 
+        internal::thread_sleep_v3(i);  
+    }
+}  // namespace this_tbb_thread
+
+} // namespace tbb
+
+#endif /* __TBB_tbb_thread_H */
diff --git a/tbb/include/tbb/tbbmalloc_proxy.h b/tbb/include/tbb/tbbmalloc_proxy.h
new file mode 100644 (file)
index 0000000..f0f0ed7
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+/*
+Replacing the standard memory allocation routines in Microsoft* C/C++ RTL 
+(malloc/free, global new/delete, etc.) with the TBB memory allocator. 
+
+Include the following header to a source of any binary which is loaded during 
+application startup
+
+#include "tbb/tbbmalloc_proxy.h"
+
+or add following parameters to the linker options for the binary which is 
+loaded during application startup. It can be either exe-file or dll.
+
+For win32
+tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy"
+win64
+tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy"
+*/
+
+#ifndef __TBB_tbbmalloc_proxy_H
+#define __TBB_tbbmalloc_proxy_H
+
+#if _MSC_VER
+
+#ifdef _DEBUG
+    #pragma comment(lib, "tbbmalloc_proxy_debug.lib")
+#else
+    #pragma comment(lib, "tbbmalloc_proxy.lib")
+#endif
+
+#if defined(_WIN64)
+    #pragma comment(linker, "/include:__TBB_malloc_proxy")
+#else
+    #pragma comment(linker, "/include:___TBB_malloc_proxy")
+#endif
+
+#else
+/* Primarily to support MinGW */
+
+extern "C" void __TBB_malloc_proxy();
+struct __TBB_malloc_proxy_caller {
+    __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); }
+} volatile __TBB_malloc_proxy_helper_object;
+
+#endif // _MSC_VER
+
+#endif //__TBB_tbbmalloc_proxy_H
diff --git a/tbb/include/tbb/tick_count.h b/tbb/include/tbb/tick_count.h
new file mode 100644 (file)
index 0000000..394afb0
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+    Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks.
+
+    Threading Building Blocks is free software; you can redistribute it
+    and/or modify it under the terms of the GNU General Public License
+    version 2 as published by the Free Software Foundation.
+
+    Threading Building Blocks is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Threading Building Blocks; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    As a special exception, you may use this file as part of a free software
+    library without restriction.  Specifically, if other files instantiate
+    templates or use macros or inline functions from this file, or you compile
+    this file and link it with other files to produce an executable, this
+    file does not by itself cause the resulting executable to be covered by
+    the GNU General Public License.  This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+#ifndef __TBB_tick_count_H
+#define __TBB_tick_count_H
+
+#include "tbb_stddef.h"
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#elif __linux__
+#include <ctime>
+#else /* generic Unix */
+#include <sys/time.h>
+#endif /* (choice of OS) */
+
+namespace tbb {
+
+//! Absolute timestamp
+/** @ingroup timing */
+class tick_count {
+public:
+    //! Relative time interval.
+    class interval_t {
+        long long value;
+        explicit interval_t( long long value_ ) : value(value_) {}
+    public:
+        //! Construct a time interval representing zero time duration
+        interval_t() : value(0) {};
+
+        //! Construct a time interval representing sec seconds time  duration
+        explicit interval_t( double sec );
+
+        //! Return the length of a time interval in seconds
+        double seconds() const;
+
+        friend class tbb::tick_count;
+
+        //! Extract the intervals from the tick_counts and subtract them.
+        friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
+
+        //! Add two intervals.
+        friend interval_t operator+( const interval_t& i, const interval_t& j ) {
+            return interval_t(i.value+j.value);
+        }
+
+        //! Subtract two intervals.
+        friend interval_t operator-( const interval_t& i, const interval_t& j ) {
+            return interval_t(i.value-j.value);
+        }
+
+        //! Accumulation operator
+        interval_t& operator+=( const interval_t& i ) {value += i.value; return *this;}
+
+        //! Subtraction operator
+        interval_t& operator-=( const interval_t& i ) {value -= i.value; return *this;}
+    };
+    
+    //! Construct an absolute timestamp initialized to zero.
+    tick_count() : my_count(0) {};
+
+    //! Return current time.
+    static tick_count now();
+    
+    //! Subtract two timestamps to get the time interval between
+    friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
+
+private:
+    long long my_count;
+};
+
+inline tick_count tick_count::now() {
+    tick_count result;
+#if _WIN32||_WIN64
+    LARGE_INTEGER qpcnt;
+    QueryPerformanceCounter(&qpcnt);
+    result.my_count = qpcnt.QuadPart;
+#elif __linux__
+    struct timespec ts;
+#if TBB_USE_ASSERT
+    int status = 
+#endif /* TBB_USE_ASSERT */
+        clock_gettime( CLOCK_REALTIME, &ts );
+    __TBB_ASSERT( status==0, "CLOCK_REALTIME not supported" );
+    result.my_count = static_cast<long long>(1000000000UL)*static_cast<long long>(ts.tv_sec) + static_cast<long long>(ts.tv_nsec);
+#else /* generic Unix */
+    struct timeval tv;
+#if TBB_USE_ASSERT
+    int status = 
+#endif /* TBB_USE_ASSERT */
+        gettimeofday(&tv, NULL);
+    __TBB_ASSERT( status==0, "gettimeofday failed" );
+    result.my_count = static_cast<long long>(1000000)*static_cast<long long>(tv.tv_sec) + static_cast<long long>(tv.tv_usec);
+#endif /*(choice of OS) */
+    return result;
+}
+
+inline tick_count::interval_t::interval_t( double sec )
+{
+#if _WIN32||_WIN64
+    LARGE_INTEGER qpfreq;
+    QueryPerformanceFrequency(&qpfreq);
+    value = static_cast<long long>(sec*qpfreq.QuadPart);
+#elif __linux__
+    value = static_cast<long long>(sec*1E9);
+#else /* generic Unix */
+    value = static_cast<long long>(sec*1E6);
+#endif /* (choice of OS) */
+}
+
+inline tick_count::interval_t operator-( const tick_count& t1, const tick_count& t0 ) {
+    return tick_count::interval_t( t1.my_count-t0.my_count );
+}
+
+inline double tick_count::interval_t::seconds() const {
+#if _WIN32||_WIN64
+    LARGE_INTEGER qpfreq;
+    QueryPerformanceFrequency(&qpfreq);
+    return value/(double)qpfreq.QuadPart;
+#elif __linux__
+    return value*1E-9;
+#else /* generic Unix */
+    return value*1E-6;
+#endif /* (choice of OS) */
+}
+
+} // namespace tbb
+
+#endif /* __TBB_tick_count_H */
+
diff --git a/tbb/lib/ia32/vc10/irml/irml.lib b/tbb/lib/ia32/vc10/irml/irml.lib
new file mode 100644 (file)
index 0000000..9d6c9e3
Binary files /dev/null and b/tbb/lib/ia32/vc10/irml/irml.lib differ
diff --git a/tbb/lib/ia32/vc10/irml/irml_debug.lib b/tbb/lib/ia32/vc10/irml/irml_debug.lib
new file mode 100644 (file)
index 0000000..b7c5fbe
Binary files /dev/null and b/tbb/lib/ia32/vc10/irml/irml_debug.lib differ
diff --git a/tbb/lib/ia32/vc10/irml_c/irml.lib b/tbb/lib/ia32/vc10/irml_c/irml.lib
new file mode 100644 (file)
index 0000000..781e150
Binary files /dev/null and b/tbb/lib/ia32/vc10/irml_c/irml.lib differ
diff --git a/tbb/lib/ia32/vc10/irml_c/irml_debug.lib b/tbb/lib/ia32/vc10/irml_c/irml_debug.lib
new file mode 100644 (file)
index 0000000..50b0445
Binary files /dev/null and b/tbb/lib/ia32/vc10/irml_c/irml_debug.lib differ
diff --git a/tbb/lib/ia32/vc10/tbb.def b/tbb/lib/ia32/vc10/tbb.def
new file mode 100644 (file)
index 0000000..dff02dd
--- /dev/null
@@ -0,0 +1,569 @@
+\r
+; Copyright 2005-2011 Intel Corporation.  All Rights Reserved.\r
+;\r
+; The source code contained or described herein and all documents related\r
+; to the source code ("Material") are owned by Intel Corporation or its\r
+; suppliers or licensors.  Title to the Material remains with Intel\r
+; Corporation or its suppliers and licensors.  The Material is protected\r
+; by worldwide copyright laws and treaty provisions.  No part of the\r
+; Material may be used, copied, reproduced, modified, published, uploaded,\r
+; posted, transmitted, distributed, or disclosed in any way without\r
+; Intel's prior express written permission.\r
+;\r
+; No license under any patent, copyright, trade secret or other\r
+; intellectual property right is granted to or conferred upon you by\r
+; disclosure or delivery of the Materials, either expressly, by\r
+; implication, inducement, estoppel or otherwise.  Any license under such\r
+; intellectual property rights must be express and approved by Intel in\r
+; writing.\r
+\r
+EXPORTS\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+    \r
+    \r
+\r
+\r
+        \r
+    \r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+    \r
+        \r
+    \r
+\r
+\r
+\r
+    \r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+    \r
+        \r
+    \r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+__TBB_machine_cmpswp8\r
+\r
+\r
+\r
+__TBB_machine_fetchadd8\r
+\r
+\r
+\r
+__TBB_machine_fetchstore8\r
+__TBB_machine_store8\r
+__TBB_machine_load8\r
+__TBB_machine_trylockbyte\r
+\r
+\r
+?NFS_Allocate@internal@tbb@@YAPAXIIPAX@Z\r
+?NFS_GetLineSize@internal@tbb@@YAIXZ\r
+?NFS_Free@internal@tbb@@YAXPAX@Z\r
+?allocate_via_handler_v3@internal@tbb@@YAPAXI@Z\r
+?deallocate_via_handler_v3@internal@tbb@@YAXPAX@Z\r
+?is_malloc_used_v3@internal@tbb@@YA_NXZ\r
+\r
+\r
+?allocate@allocate_additional_child_of_proxy@internal@tbb@@QBEAAVtask@3@I@Z\r
+?allocate@allocate_child_proxy@internal@tbb@@QBEAAVtask@3@I@Z\r
+?allocate@allocate_continuation_proxy@internal@tbb@@QBEAAVtask@3@I@Z\r
+?allocate@allocate_root_proxy@internal@tbb@@SAAAVtask@3@I@Z\r
+?destroy@task_base@internal@interface5@tbb@@SAXAAVtask@4@@Z\r
+?free@allocate_additional_child_of_proxy@internal@tbb@@QBEXAAVtask@3@@Z\r
+?free@allocate_child_proxy@internal@tbb@@QBEXAAVtask@3@@Z\r
+?free@allocate_continuation_proxy@internal@tbb@@QBEXAAVtask@3@@Z\r
+?free@allocate_root_proxy@internal@tbb@@SAXAAVtask@3@@Z\r
+?internal_set_ref_count@task@tbb@@AAEXH@Z\r
+?internal_decrement_ref_count@task@tbb@@AAEHXZ\r
+?is_owned_by_current_thread@task@tbb@@QBE_NXZ\r
+?note_affinity@task@tbb@@UAEXG@Z\r
+?resize@affinity_partitioner_base_v3@internal@tbb@@AAEXI@Z\r
+?self@task@tbb@@SAAAV12@XZ\r
+?spawn_and_wait_for_all@task@tbb@@QAEXAAVtask_list@2@@Z\r
+?default_num_threads@task_scheduler_init@tbb@@SAHXZ\r
+?initialize@task_scheduler_init@tbb@@QAEXHI@Z\r
+?initialize@task_scheduler_init@tbb@@QAEXH@Z\r
+?terminate@task_scheduler_init@tbb@@QAEXXZ\r
+?observe@task_scheduler_observer_v3@internal@tbb@@QAEX_N@Z\r
+\r
+\r
+\r
+?destroy@task@tbb@@QAEXAAV12@@Z\r
+\r
+\r
+\r
+\r
+?allocate@allocate_root_with_context_proxy@internal@tbb@@QBEAAVtask@3@I@Z\r
+?free@allocate_root_with_context_proxy@internal@tbb@@QBEXAAVtask@3@@Z\r
+?change_group@task@tbb@@QAEXAAVtask_group_context@2@@Z\r
+?is_group_execution_cancelled@task_group_context@tbb@@QBE_NXZ\r
+?cancel_group_execution@task_group_context@tbb@@QAE_NXZ\r
+?reset@task_group_context@tbb@@QAEXXZ\r
+?init@task_group_context@tbb@@IAEXXZ\r
+?register_pending_exception@task_group_context@tbb@@QAEXXZ\r
+??1task_group_context@tbb@@QAE@XZ\r
+\r
+\r
+\r
+\r
+?name@captured_exception@tbb@@UBEPBDXZ\r
+?what@captured_exception@tbb@@UBEPBDXZ\r
+??1captured_exception@tbb@@UAE@XZ\r
+?move@captured_exception@tbb@@UAEPAV12@XZ\r
+?destroy@captured_exception@tbb@@UAEXXZ\r
+?set@captured_exception@tbb@@QAEXPBD0@Z\r
+?clear@captured_exception@tbb@@QAEXXZ\r
+\r
+\r
+\r
+?throw_bad_last_alloc_exception_v4@internal@tbb@@YAXXZ\r
+?throw_exception_v4@internal@tbb@@YAXW4exception_id@12@@Z\r
+?what@bad_last_alloc@tbb@@UBEPBDXZ\r
+?what@missing_wait@tbb@@UBEPBDXZ\r
+?what@invalid_multiple_scheduling@tbb@@UBEPBDXZ\r
+?what@improper_lock@tbb@@UBEPBDXZ\r
+\r
+\r
+?assertion_failure@tbb@@YAXPBDH00@Z\r
+?get_initial_auto_partitioner_divisor@internal@tbb@@YAIXZ\r
+?handle_perror@internal@tbb@@YAXHPBD@Z\r
+?set_assertion_handler@tbb@@YAP6AXPBDH00@ZP6AX0H00@Z@Z\r
+?runtime_warning@internal@tbb@@YAXPBDZZ\r
+TBB_runtime_interface_version\r
+\r
+\r
+?itt_load_pointer_with_acquire_v3@internal@tbb@@YAPAXPBX@Z\r
+?itt_store_pointer_with_release_v3@internal@tbb@@YAXPAX0@Z\r
+?call_itt_notify_v5@internal@tbb@@YAXHPAX@Z\r
+?itt_set_sync_name_v3@internal@tbb@@YAXPAXPB_W@Z\r
+?itt_load_pointer_v3@internal@tbb@@YAPAXPBX@Z\r
+\r
+\r
+??0pipeline@tbb@@QAE@XZ\r
+??1filter@tbb@@UAE@XZ\r
+??1pipeline@tbb@@UAE@XZ\r
+??_7pipeline@tbb@@6B@\r
+?add_filter@pipeline@tbb@@QAEXAAVfilter@2@@Z\r
+?clear@pipeline@tbb@@QAEXXZ\r
+?inject_token@pipeline@tbb@@AAEXAAVtask@2@@Z\r
+?run@pipeline@tbb@@QAEXI@Z\r
+\r
+?run@pipeline@tbb@@QAEXIAAVtask_group_context@2@@Z\r
+\r
+?process_item@thread_bound_filter@tbb@@QAE?AW4result_type@12@XZ\r
+?try_process_item@thread_bound_filter@tbb@@QAE?AW4result_type@12@XZ\r
+?set_end_of_input@filter@tbb@@IAEXXZ\r
+\r
+\r
+?internal_construct@queuing_rw_mutex@tbb@@QAEXXZ\r
+?acquire@scoped_lock@queuing_rw_mutex@tbb@@QAEXAAV23@_N@Z\r
+?downgrade_to_reader@scoped_lock@queuing_rw_mutex@tbb@@QAE_NXZ\r
+?release@scoped_lock@queuing_rw_mutex@tbb@@QAEXXZ\r
+?upgrade_to_writer@scoped_lock@queuing_rw_mutex@tbb@@QAE_NXZ\r
+?try_acquire@scoped_lock@queuing_rw_mutex@tbb@@QAE_NAAV23@_N@Z\r
+\r
+\r
+?try_lock_read@reader_writer_lock@interface5@tbb@@QAE_NXZ\r
+?try_lock@reader_writer_lock@interface5@tbb@@QAE_NXZ\r
+?unlock@reader_writer_lock@interface5@tbb@@QAEXXZ\r
+?lock_read@reader_writer_lock@interface5@tbb@@QAEXXZ\r
+?lock@reader_writer_lock@interface5@tbb@@QAEXXZ\r
+?internal_construct@reader_writer_lock@interface5@tbb@@AAEXXZ\r
+?internal_destroy@reader_writer_lock@interface5@tbb@@AAEXXZ\r
+?internal_construct@scoped_lock@reader_writer_lock@interface5@tbb@@AAEXAAV234@@Z\r
+?internal_destroy@scoped_lock@reader_writer_lock@interface5@tbb@@AAEXXZ\r
+?internal_construct@scoped_lock_read@reader_writer_lock@interface5@tbb@@AAEXAAV234@@Z\r
+?internal_destroy@scoped_lock_read@reader_writer_lock@interface5@tbb@@AAEXXZ\r
+\r
+\r
+\r
+?internal_acquire_reader@spin_rw_mutex@tbb@@CAXPAV12@@Z\r
+?internal_acquire_writer@spin_rw_mutex@tbb@@CA_NPAV12@@Z\r
+?internal_downgrade@spin_rw_mutex@tbb@@CAXPAV12@@Z\r
+?internal_itt_releasing@spin_rw_mutex@tbb@@CAXPAV12@@Z\r
+?internal_release_reader@spin_rw_mutex@tbb@@CAXPAV12@@Z\r
+?internal_release_writer@spin_rw_mutex@tbb@@CAXPAV12@@Z\r
+?internal_upgrade@spin_rw_mutex@tbb@@CA_NPAV12@@Z\r
+?internal_try_acquire_writer@spin_rw_mutex@tbb@@CA_NPAV12@@Z\r
+?internal_try_acquire_reader@spin_rw_mutex@tbb@@CA_NPAV12@@Z\r
+\r
+\r
+\r
+?internal_construct@spin_rw_mutex_v3@tbb@@AAEXXZ\r
+?internal_upgrade@spin_rw_mutex_v3@tbb@@AAE_NXZ\r
+?internal_downgrade@spin_rw_mutex_v3@tbb@@AAEXXZ\r
+?internal_acquire_reader@spin_rw_mutex_v3@tbb@@AAEXXZ\r
+?internal_acquire_writer@spin_rw_mutex_v3@tbb@@AAE_NXZ\r
+?internal_release_reader@spin_rw_mutex_v3@tbb@@AAEXXZ\r
+?internal_release_writer@spin_rw_mutex_v3@tbb@@AAEXXZ\r
+?internal_try_acquire_reader@spin_rw_mutex_v3@tbb@@AAE_NXZ\r
+?internal_try_acquire_writer@spin_rw_mutex_v3@tbb@@AAE_NXZ\r
+\r
+\r
+?internal_construct@spin_mutex@tbb@@QAEXXZ\r
+?internal_acquire@scoped_lock@spin_mutex@tbb@@AAEXAAV23@@Z\r
+?internal_release@scoped_lock@spin_mutex@tbb@@AAEXXZ\r
+?internal_try_acquire@scoped_lock@spin_mutex@tbb@@AAE_NAAV23@@Z\r
+\r
+\r
+?internal_acquire@scoped_lock@mutex@tbb@@AAEXAAV23@@Z\r
+?internal_release@scoped_lock@mutex@tbb@@AAEXXZ\r
+?internal_try_acquire@scoped_lock@mutex@tbb@@AAE_NAAV23@@Z\r
+?internal_construct@mutex@tbb@@AAEXXZ\r
+?internal_destroy@mutex@tbb@@AAEXXZ\r
+\r
+\r
+?internal_acquire@scoped_lock@recursive_mutex@tbb@@AAEXAAV23@@Z\r
+?internal_release@scoped_lock@recursive_mutex@tbb@@AAEXXZ\r
+?internal_try_acquire@scoped_lock@recursive_mutex@tbb@@AAE_NAAV23@@Z\r
+?internal_construct@recursive_mutex@tbb@@AAEXXZ\r
+?internal_destroy@recursive_mutex@tbb@@AAEXXZ\r
+\r
+\r
+?internal_construct@queuing_mutex@tbb@@QAEXXZ\r
+?acquire@scoped_lock@queuing_mutex@tbb@@QAEXAAV23@@Z\r
+?release@scoped_lock@queuing_mutex@tbb@@QAEXXZ\r
+?try_acquire@scoped_lock@queuing_mutex@tbb@@QAE_NAAV23@@Z\r
+\r
+\r
+?internal_construct@critical_section_v4@internal@tbb@@QAEXXZ\r
+\r
+\r
+\r
+?internal_grow_predicate@hash_map_segment_base@internal@tbb@@QBE_NXZ\r
+\r
+\r
+?advance@concurrent_queue_iterator_base@internal@tbb@@IAEXXZ\r
+?assign@concurrent_queue_iterator_base@internal@tbb@@IAEXABV123@@Z\r
+?internal_size@concurrent_queue_base@internal@tbb@@IBEHXZ\r
+??0concurrent_queue_base@internal@tbb@@IAE@I@Z\r
+??0concurrent_queue_iterator_base@internal@tbb@@IAE@ABVconcurrent_queue_base@12@@Z\r
+??1concurrent_queue_base@internal@tbb@@MAE@XZ\r
+??1concurrent_queue_iterator_base@internal@tbb@@IAE@XZ\r
+?internal_pop@concurrent_queue_base@internal@tbb@@IAEXPAX@Z\r
+?internal_pop_if_present@concurrent_queue_base@internal@tbb@@IAE_NPAX@Z\r
+?internal_push@concurrent_queue_base@internal@tbb@@IAEXPBX@Z\r
+?internal_push_if_not_full@concurrent_queue_base@internal@tbb@@IAE_NPBX@Z\r
+?internal_set_capacity@concurrent_queue_base@internal@tbb@@IAEXHI@Z\r
+\r
+\r
+\r
+??1concurrent_queue_iterator_base_v3@internal@tbb@@IAE@XZ\r
+??0concurrent_queue_iterator_base_v3@internal@tbb@@IAE@ABVconcurrent_queue_base_v3@12@@Z\r
+??0concurrent_queue_iterator_base_v3@internal@tbb@@IAE@ABVconcurrent_queue_base_v3@12@I@Z\r
+?advance@concurrent_queue_iterator_base_v3@internal@tbb@@IAEXXZ\r
+?assign@concurrent_queue_iterator_base_v3@internal@tbb@@IAEXABV123@@Z\r
+??0concurrent_queue_base_v3@internal@tbb@@IAE@I@Z\r
+??1concurrent_queue_base_v3@internal@tbb@@MAE@XZ\r
+?internal_pop@concurrent_queue_base_v3@internal@tbb@@IAEXPAX@Z\r
+?internal_pop_if_present@concurrent_queue_base_v3@internal@tbb@@IAE_NPAX@Z\r
+?internal_push@concurrent_queue_base_v3@internal@tbb@@IAEXPBX@Z\r
+?internal_push_if_not_full@concurrent_queue_base_v3@internal@tbb@@IAE_NPBX@Z\r
+?internal_size@concurrent_queue_base_v3@internal@tbb@@IBEHXZ\r
+?internal_empty@concurrent_queue_base_v3@internal@tbb@@IBE_NXZ\r
+?internal_set_capacity@concurrent_queue_base_v3@internal@tbb@@IAEXHI@Z\r
+?internal_finish_clear@concurrent_queue_base_v3@internal@tbb@@IAEXXZ\r
+?internal_throw_exception@concurrent_queue_base_v3@internal@tbb@@IBEXXZ\r
+?assign@concurrent_queue_base_v3@internal@tbb@@IAEXABV123@@Z\r
+\r
+\r
+\r
+?internal_assign@concurrent_vector_base@internal@tbb@@IAEXABV123@IP6AXPAXI@ZP6AX1PBXI@Z4@Z\r
+?internal_capacity@concurrent_vector_base@internal@tbb@@IBEIXZ\r
+?internal_clear@concurrent_vector_base@internal@tbb@@IAEXP6AXPAXI@Z_N@Z\r
+?internal_copy@concurrent_vector_base@internal@tbb@@IAEXABV123@IP6AXPAXPBXI@Z@Z\r
+?internal_grow_by@concurrent_vector_base@internal@tbb@@IAEIIIP6AXPAXI@Z@Z\r
+?internal_grow_to_at_least@concurrent_vector_base@internal@tbb@@IAEXIIP6AXPAXI@Z@Z\r
+?internal_push_back@concurrent_vector_base@internal@tbb@@IAEPAXIAAI@Z\r
+?internal_reserve@concurrent_vector_base@internal@tbb@@IAEXIII@Z\r
+\r
+\r
+\r
+??1concurrent_vector_base_v3@internal@tbb@@IAE@XZ\r
+?internal_assign@concurrent_vector_base_v3@internal@tbb@@IAEXABV123@IP6AXPAXI@ZP6AX1PBXI@Z4@Z\r
+?internal_capacity@concurrent_vector_base_v3@internal@tbb@@IBEIXZ\r
+?internal_clear@concurrent_vector_base_v3@internal@tbb@@IAEIP6AXPAXI@Z@Z\r
+?internal_copy@concurrent_vector_base_v3@internal@tbb@@IAEXABV123@IP6AXPAXPBXI@Z@Z\r
+?internal_grow_by@concurrent_vector_base_v3@internal@tbb@@IAEIIIP6AXPAXPBXI@Z1@Z\r
+?internal_grow_to_at_least@concurrent_vector_base_v3@internal@tbb@@IAEXIIP6AXPAXPBXI@Z1@Z\r
+?internal_push_back@concurrent_vector_base_v3@internal@tbb@@IAEPAXIAAI@Z\r
+?internal_reserve@concurrent_vector_base_v3@internal@tbb@@IAEXIII@Z\r
+?internal_compact@concurrent_vector_base_v3@internal@tbb@@IAEPAXIPAXP6AX0I@ZP6AX0PBXI@Z@Z\r
+?internal_swap@concurrent_vector_base_v3@internal@tbb@@IAEXAAV123@@Z\r
+?internal_throw_exception@concurrent_vector_base_v3@internal@tbb@@IBEXI@Z\r
+?internal_resize@concurrent_vector_base_v3@internal@tbb@@IAEXIIIPBXP6AXPAXI@ZP6AX10I@Z@Z\r
+?internal_grow_to_at_least_with_result@concurrent_vector_base_v3@internal@tbb@@IAEIIIP6AXPAXPBXI@Z1@Z\r
+\r
+\r
+?join@tbb_thread_v3@internal@tbb@@QAEXXZ\r
+?detach@tbb_thread_v3@internal@tbb@@QAEXXZ\r
+?internal_start@tbb_thread_v3@internal@tbb@@AAEXP6GIPAX@Z0@Z\r
+?allocate_closure_v3@internal@tbb@@YAPAXI@Z\r
+?free_closure_v3@internal@tbb@@YAXPAX@Z\r
+?hardware_concurrency@tbb_thread_v3@internal@tbb@@SAIXZ\r
+?thread_yield_v3@internal@tbb@@YAXXZ\r
+?thread_sleep_v3@internal@tbb@@YAXABVinterval_t@tick_count@2@@Z\r
+?move_v3@internal@tbb@@YAXAAVtbb_thread_v3@12@0@Z\r
+?thread_get_id_v3@internal@tbb@@YA?AVid@tbb_thread_v3@12@XZ\r
+\r
+\r
+?internal_initialize_condition_variable@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z\r
+?internal_condition_variable_wait@internal@interface5@tbb@@YA_NAATcondvar_impl_t@123@PAVmutex@3@PBVinterval_t@tick_count@3@@Z\r
+?internal_condition_variable_notify_one@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z\r
+?internal_condition_variable_notify_all@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z\r
+?internal_destroy_condition_variable@internal@interface5@tbb@@YAXAATcondvar_impl_t@123@@Z\r
+\r
+\r
+\r
+\r
diff --git a/tbb/lib/ia32/vc10/tbb.lib b/tbb/lib/ia32/vc10/tbb.lib
new file mode 100644 (file)
index 0000000..fa25f58
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbb.lib differ
diff --git a/tbb/lib/ia32/vc10/tbb_debug.lib b/tbb/lib/ia32/vc10/tbb_debug.lib
new file mode 100644 (file)
index 0000000..9419494
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbb_debug.lib differ
diff --git a/tbb/lib/ia32/vc10/tbb_preview.lib b/tbb/lib/ia32/vc10/tbb_preview.lib
new file mode 100644 (file)
index 0000000..5de0dd4
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbb_preview.lib differ
diff --git a/tbb/lib/ia32/vc10/tbb_preview_debug.lib b/tbb/lib/ia32/vc10/tbb_preview_debug.lib
new file mode 100644 (file)
index 0000000..20766cd
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbb_preview_debug.lib differ
diff --git a/tbb/lib/ia32/vc10/tbbmalloc.def b/tbb/lib/ia32/vc10/tbbmalloc.def
new file mode 100644 (file)
index 0000000..61d8b7e
--- /dev/null
@@ -0,0 +1,35 @@
+\r
+; Copyright 2005-2011 Intel Corporation.  All Rights Reserved.\r
+;\r
+; The source code contained or described herein and all documents related\r
+; to the source code ("Material") are owned by Intel Corporation or its\r
+; suppliers or licensors.  Title to the Material remains with Intel\r
+; Corporation or its suppliers and licensors.  The Material is protected\r
+; by worldwide copyright laws and treaty provisions.  No part of the\r
+; Material may be used, copied, reproduced, modified, published, uploaded,\r
+; posted, transmitted, distributed, or disclosed in any way without\r
+; Intel's prior express written permission.\r
+;\r
+; No license under any patent, copyright, trade secret or other\r
+; intellectual property right is granted to or conferred upon you by\r
+; disclosure or delivery of the Materials, either expressly, by\r
+; implication, inducement, estoppel or otherwise.  Any license under such\r
+; intellectual property rights must be express and approved by Intel in\r
+; writing.\r
+\r
+EXPORTS\r
+\r
+; MemoryAllocator.cpp\r
+scalable_calloc\r
+scalable_free\r
+scalable_malloc\r
+scalable_realloc\r
+scalable_posix_memalign\r
+scalable_aligned_malloc\r
+scalable_aligned_realloc\r
+scalable_aligned_free\r
+safer_scalable_free\r
+safer_scalable_realloc\r
+scalable_msize\r
+safer_scalable_msize\r
+safer_scalable_aligned_realloc\r
diff --git a/tbb/lib/ia32/vc10/tbbmalloc.lib b/tbb/lib/ia32/vc10/tbbmalloc.lib
new file mode 100644 (file)
index 0000000..8fd46ce
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbbmalloc.lib differ
diff --git a/tbb/lib/ia32/vc10/tbbmalloc_debug.lib b/tbb/lib/ia32/vc10/tbbmalloc_debug.lib
new file mode 100644 (file)
index 0000000..237f942
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbbmalloc_debug.lib differ
diff --git a/tbb/lib/ia32/vc10/tbbmalloc_proxy.lib b/tbb/lib/ia32/vc10/tbbmalloc_proxy.lib
new file mode 100644 (file)
index 0000000..d282777
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbbmalloc_proxy.lib differ
diff --git a/tbb/lib/ia32/vc10/tbbmalloc_proxy_debug.lib b/tbb/lib/ia32/vc10/tbbmalloc_proxy_debug.lib
new file mode 100644 (file)
index 0000000..93f484f
Binary files /dev/null and b/tbb/lib/ia32/vc10/tbbmalloc_proxy_debug.lib differ