2 Copyright 2005-2011 Intel Corporation. All Rights Reserved.
4 This file is part of Threading Building Blocks.
6 Threading Building Blocks is free software; you can redistribute it
7 and/or modify it under the terms of the GNU General Public License
8 version 2 as published by the Free Software Foundation.
10 Threading Building Blocks is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Threading Building Blocks; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 As a special exception, you may use this file as part of a free software
20 library without restriction. Specifically, if other files instantiate
21 templates or use macros or inline functions from this file, or you compile
22 this file and link it with other files to produce an executable, this
23 file does not by itself cause the resulting executable to be covered by
24 the GNU General Public License. This exception does not however
25 invalidate any other reasons why the executable file might be covered by
26 the GNU General Public License.
29 #ifndef __TBB_machine_H
30 #define __TBB_machine_H
32 #include "tbb_stddef.h"
37 #pragma managed(push, off)
40 #if __MINGW64__ || __MINGW32__
41 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
42 #define __TBB_Yield() SwitchToThread()
43 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
44 #include "machine/gcc_generic.h"
46 #include "machine/linux_intel64.h"
48 #include "machine/linux_ia32.h"
50 #elif defined(_M_IX86)
51 #include "machine/windows_ia32.h"
52 #elif defined(_M_AMD64)
53 #include "machine/windows_intel64.h"
55 #include "machine/xbox360_ppc.h"
62 #elif __linux__ || __FreeBSD__ || __NetBSD__
64 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
65 #include "machine/gcc_generic.h"
67 #include "machine/linux_ia32.h"
69 #include "machine/linux_intel64.h"
71 #include "machine/linux_ia64.h"
73 #include "machine/mac_ppc.h"
74 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
75 #include "machine/gcc_generic.h"
77 #include "machine/linux_common.h"
82 #include "machine/linux_ia32.h"
84 #include "machine/linux_intel64.h"
86 #include "machine/mac_ppc.h"
88 #include "machine/macos_common.h"
92 #include "machine/ibm_aix51.h"
94 #elif __sun || __SUNPRO_CC
97 #define __volatile__ volatile
99 #if __i386 || __i386__
100 #include "machine/linux_ia32.h"
102 #include "machine/linux_intel64.h"
104 #include "machine/sunos_sparc.h"
108 #define __TBB_Yield() sched_yield()
110 #endif /* OS selection */
112 #ifndef __TBB_64BIT_ATOMICS
113 #define __TBB_64BIT_ATOMICS 1
116 //! Prerequisites for each architecture port
117 /** There are no generic implementation for these macros so they have to be implemented
118 in each machine architecture specific header.
120 __TBB_full_memory_fence must prevent all memory operations from being reordered
121 across the fence. And all such fences must be totally ordered (or sequentially
122 consistent). These fence must affect both compiler and hardware.
124 __TBB_release_consistency_helper is used to enforce guarantees of acquire or
125 release semantics in generic implementations of __TBB_load_with_acquire and
126 __TBB_store_with_release below. Depending on the particular combination of
127 architecture+compiler it can be a hardware fence, a compiler fence, both or
129 #if !defined(__TBB_CompareAndSwap4) \
130 || !defined(__TBB_CompareAndSwap8) && __TBB_64BIT_ATOMICS \
131 || !defined(__TBB_Yield) \
132 || !defined(__TBB_full_memory_fence) \
133 || !defined(__TBB_release_consistency_helper)
134 #error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
138 inline void __TBB_Pause(int32_t) {
145 //! Sequentially consistent full memory fence.
146 inline void atomic_fence () { __TBB_full_memory_fence(); }
150 //! Class that implements exponential backoff.
151 /** See implementation of spin_wait_while_eq for an example. */
152 class atomic_backoff : no_copy {
153 //! Time delay, in units of "pause" instructions.
154 /** Should be equal to approximately the number of "pause" instructions
155 that take the same time as an context switch. */
156 static const int32_t LOOPS_BEFORE_YIELD = 16;
159 atomic_backoff() : count(1) {}
161 //! Pause for a while.
163 if( count<=LOOPS_BEFORE_YIELD ) {
165 // Pause twice as long the next time.
168 // Pause is so long that we might as well yield CPU to scheduler.
173 // pause for a few times and then return false immediately.
174 bool bounded_pause() {
175 if( count<=LOOPS_BEFORE_YIELD ) {
177 // Pause twice as long the next time.
190 //! Spin WHILE the value of the variable is equal to a given value
191 /** T and U should be comparable types. */
192 template<typename T, typename U>
193 void spin_wait_while_eq( const volatile T& location, U value ) {
194 atomic_backoff backoff;
195 while( location==value ) backoff.pause();
198 //! Spin UNTIL the value of the variable is equal to a given value
199 /** T and U should be comparable types. */
200 template<typename T, typename U>
201 void spin_wait_until_eq( const volatile T& location, const U value ) {
202 atomic_backoff backoff;
203 while( location!=value ) backoff.pause();
206 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
207 // S should be either 1 or 2, for the mask calculation to work correctly.
208 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
209 template<size_t S, typename T>
210 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
211 volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
213 const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
215 const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
217 const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
221 result = *base; // reload the base value which might change during the pause
222 uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
223 uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
224 // __TBB_CompareAndSwap4 presumed to have full fence.
225 result = __TBB_CompareAndSwap4( base, new_value, old_value );
226 if( result==old_value // CAS succeeded
227 || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of interest have changed
229 else // CAS failed but the bits of interest left unchanged
232 return T((result & mask) >> bitoffset);
235 template<size_t S, typename T>
236 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) {
237 return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
241 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
242 #ifdef __TBB_CompareAndSwap1
243 return __TBB_CompareAndSwap1(ptr,value,comparand);
245 return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
250 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
251 #ifdef __TBB_CompareAndSwap2
252 return __TBB_CompareAndSwap2(ptr,value,comparand);
254 return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
259 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
260 return __TBB_CompareAndSwap4(ptr,value,comparand);
263 #if __TBB_64BIT_ATOMICS
265 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
266 return __TBB_CompareAndSwap8(ptr,value,comparand);
270 template<size_t S, typename T>
271 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
275 result = *reinterpret_cast<volatile T *>(ptr);
276 // __TBB_CompareAndSwapGeneric presumed to have full fence.
277 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
284 template<size_t S, typename T>
285 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
289 result = *reinterpret_cast<volatile T *>(ptr);
290 // __TBB_CompareAndSwapGeneric presumed to have full fence.
291 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
298 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
299 // strict as type T. Type type should have a trivial default constructor and destructor, so that
300 // arrays of that type can be declared without initializers.
301 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
302 // to a type bigger than T.
303 // The default definition here works on machines where integers are naturally aligned and the
304 // strictest alignment is 16.
305 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
307 #if __GNUC__ || __SUNPRO_CC || __IBMCPP__
308 struct __TBB_machine_type_with_strictest_alignment {
310 } __attribute__((aligned(16)));
312 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
316 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
319 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
320 template<> struct type_with_alignment<1> { char member; };
321 template<> struct type_with_alignment<2> { uint16_t member; };
322 template<> struct type_with_alignment<4> { uint32_t member; };
323 template<> struct type_with_alignment<8> { uint64_t member; };
325 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2
326 //! Work around for bug in GNU 3.2 and MSVC compilers.
327 /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated.
328 The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */
329 template<size_t Size, typename T>
330 struct work_around_alignment_bug {
332 static const size_t alignment = __alignof(T);
334 static const size_t alignment = __alignof__(T);
337 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
338 #elif __GNUC__ || __SUNPRO_CC || __IBMCPP__
339 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
341 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
343 #endif /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
345 // Template class here is to avoid instantiation of the static data for modules that don't use it
348 static const T byte_table[256];
350 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
351 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
353 const T reverse<T>::byte_table[256] = {
354 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
355 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
356 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
357 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
358 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
359 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
360 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
361 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
362 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
363 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
364 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
365 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
366 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
367 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
368 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
369 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
372 } // namespace internal
375 #ifndef __TBB_CompareAndSwap1
376 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
379 #ifndef __TBB_CompareAndSwap2
380 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
383 #ifndef __TBB_CompareAndSwapW
384 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
387 #ifndef __TBB_FetchAndAdd1
388 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
391 #ifndef __TBB_FetchAndAdd2
392 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
395 #ifndef __TBB_FetchAndAdd4
396 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
399 #ifndef __TBB_FetchAndAdd8
400 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
403 #ifndef __TBB_FetchAndAddW
404 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
407 #ifndef __TBB_FetchAndStore1
408 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
411 #ifndef __TBB_FetchAndStore2
412 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
415 #ifndef __TBB_FetchAndStore4
416 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
419 #ifndef __TBB_FetchAndStore8
420 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
423 #ifndef __TBB_FetchAndStoreW
424 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
427 #if __TBB_DECL_FENCED_ATOMICS
429 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
430 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
432 #ifndef __TBB_CompareAndSwap1acquire
433 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
435 #ifndef __TBB_CompareAndSwap1release
436 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
439 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
440 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
442 #ifndef __TBB_CompareAndSwap2acquire
443 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
445 #ifndef __TBB_CompareAndSwap2release
446 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
449 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
450 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
452 #ifndef __TBB_CompareAndSwap4acquire
453 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
455 #ifndef __TBB_CompareAndSwap4release
456 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
459 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
460 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
462 #ifndef __TBB_CompareAndSwap8acquire
463 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
465 #ifndef __TBB_CompareAndSwap8release
466 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
469 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
470 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
472 #ifndef __TBB_FetchAndAdd1acquire
473 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
475 #ifndef __TBB_FetchAndAdd1release
476 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
479 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
480 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
482 #ifndef __TBB_FetchAndAdd2acquire
483 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
485 #ifndef __TBB_FetchAndAdd2release
486 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
489 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
490 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
492 #ifndef __TBB_FetchAndAdd4acquire
493 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
495 #ifndef __TBB_FetchAndAdd4release
496 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
499 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
500 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
502 #ifndef __TBB_FetchAndAdd8acquire
503 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
505 #ifndef __TBB_FetchAndAdd8release
506 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
509 #ifndef __TBB_FetchAndStore1__TBB_full_fence
510 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
512 #ifndef __TBB_FetchAndStore1acquire
513 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
515 #ifndef __TBB_FetchAndStore1release
516 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
519 #ifndef __TBB_FetchAndStore2__TBB_full_fence
520 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
522 #ifndef __TBB_FetchAndStore2acquire
523 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
525 #ifndef __TBB_FetchAndStore2release
526 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
529 #ifndef __TBB_FetchAndStore4__TBB_full_fence
530 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
532 #ifndef __TBB_FetchAndStore4acquire
533 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
535 #ifndef __TBB_FetchAndStore4release
536 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
539 #ifndef __TBB_FetchAndStore8__TBB_full_fence
540 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
542 #ifndef __TBB_FetchAndStore8acquire
543 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
545 #ifndef __TBB_FetchAndStore8release
546 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
549 #endif // __TBB_DECL_FENCED_ATOMICS
551 // Special atomic functions
552 #ifndef __TBB_FetchAndAddWrelease
553 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
556 #ifndef __TBB_FetchAndIncrementWacquire
557 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
560 #ifndef __TBB_FetchAndDecrementWrelease
561 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
564 template <typename T, size_t S>
565 struct __TBB_machine_load_store {
566 static inline T load_with_acquire(const volatile T& location) {
567 T to_return = location;
568 __TBB_release_consistency_helper();
572 static inline void store_with_release(volatile T &location, T value) {
573 __TBB_release_consistency_helper();
578 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
580 using tbb::internal::int64_t;
582 // On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
584 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
586 int64_t result = *(int64_t *)ptr;
587 if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
593 inline int64_t __TBB_Load8 (const volatile void *ptr) {
594 const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
595 return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
599 template <typename T>
600 struct __TBB_machine_load_store<T,8> {
601 static inline T load_with_acquire(const volatile T& location) {
602 T to_return = (T)__TBB_Load8((const volatile void*)&location);
603 __TBB_release_consistency_helper();
607 static inline void store_with_release(volatile T& location, T value) {
608 __TBB_release_consistency_helper();
609 __TBB_Store8((volatile void *)&location,(int64_t)value);
612 #endif /* __TBB_WORDSIZE==4 */
614 #ifndef __TBB_load_with_acquire
616 inline T __TBB_load_with_acquire(const volatile T &location) {
617 return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
621 #ifndef __TBB_store_with_release
622 template<typename T, typename V>
623 inline void __TBB_store_with_release(volatile T& location, V value) {
624 __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
626 //! Overload that exists solely to avoid /Wp64 warnings.
627 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
628 __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
633 inline intptr_t __TBB_Log2( uintptr_t x ) {
634 if( x==0 ) return -1;
637 #if __TBB_WORDSIZE>=8
638 if( (tmp = x>>32) ) { x=tmp; result += 32; }
640 if( (tmp = x>>16) ) { x=tmp; result += 16; }
641 if( (tmp = x>>8) ) { x=tmp; result += 8; }
642 if( (tmp = x>>4) ) { x=tmp; result += 4; }
643 if( (tmp = x>>2) ) { x=tmp; result += 2; }
644 return (x&2)? result+1: result;
648 #ifndef __TBB_AtomicOR
649 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
650 tbb::internal::atomic_backoff b;
652 uintptr_t tmp = *(volatile uintptr_t *)operand;
653 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
654 if( result==tmp ) break;
660 #ifndef __TBB_AtomicAND
661 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
662 tbb::internal::atomic_backoff b;
664 uintptr_t tmp = *(volatile uintptr_t *)operand;
665 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
666 if( result==tmp ) break;
673 typedef unsigned char __TBB_Byte;
676 #ifndef __TBB_TryLockByte
677 inline bool __TBB_TryLockByte( __TBB_Byte &flag ) {
678 return __TBB_CompareAndSwap1(&flag,1,0)==0;
682 #ifndef __TBB_LockByte
683 inline uintptr_t __TBB_LockByte( __TBB_Byte& flag ) {
684 if ( !__TBB_TryLockByte(flag) ) {
685 tbb::internal::atomic_backoff b;
688 } while ( !__TBB_TryLockByte(flag) );
694 #define __TBB_UnlockByte __TBB_store_with_release
696 #ifndef __TBB_ReverseByte
697 inline unsigned char __TBB_ReverseByte(unsigned char src) {
698 return tbb::internal::reverse<unsigned char>::byte_table[src];
703 T __TBB_ReverseBits(T src)
706 unsigned char *original = (unsigned char *) &src;
707 unsigned char *reversed = (unsigned char *) &dst;
709 for( int i = sizeof(T)-1; i >= 0; i-- )
710 reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
715 #endif /* __TBB_machine_H */