X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=tbb%2Finclude%2Ftbb%2Ftbb_machine.h;h=50636e5d274e8d4d8bd186fdba587d32a8ef8f4b;hb=4c96b4064a92f65beead3cb2453d727187bed40f;hp=8b43a1285500f5302fb34221f7a8d635917538bb;hpb=46ab0514ba58ee00183ff0584c7ea7c9e3d76494;p=casparcg diff --git a/tbb/include/tbb/tbb_machine.h b/tbb/include/tbb/tbb_machine.h index 8b43a1285..50636e5d2 100644 --- a/tbb/include/tbb/tbb_machine.h +++ b/tbb/include/tbb/tbb_machine.h @@ -29,8 +29,133 @@ #ifndef __TBB_machine_H #define __TBB_machine_H +/** This header provides basic platform abstraction layer by hooking up appropriate + architecture/OS/compiler specific headers from the /include/tbb/machine directory. + If a plug-in header does not implement all the required APIs, it must specify + the missing ones by setting one or more of the following macros: + + __TBB_USE_GENERIC_PART_WORD_CAS + __TBB_USE_GENERIC_PART_WORD_FETCH_ADD + __TBB_USE_GENERIC_PART_WORD_FETCH_STORE + __TBB_USE_GENERIC_FETCH_ADD + __TBB_USE_GENERIC_FETCH_STORE + __TBB_USE_GENERIC_DWORD_FETCH_ADD + __TBB_USE_GENERIC_DWORD_FETCH_STORE + __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE + __TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE + __TBB_USE_GENERIC_RELAXED_LOAD_STORE + __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE + + In this case tbb_machine.h will add missing functionality based on a minimal set + of APIs that are required to be implemented by all plug-n headers as described + futher. + Note that these generic implementations may be sub-optimal for a particular + architecture, and thus should be relied upon only after careful evaluation + or as the last resort. + + Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architecture to + indicate that the port is not going to support double word atomics. It may also + be set to 1 explicitly, though normally this is not necessary as tbb_machine.h + will set it automatically. + + Prerequisites for each architecture port + ---------------------------------------- + The following functions have no generic implementation. Therefore they must be + implemented in each machine architecture specific header either as a conventional + function or as a functional macro. + + __TBB_Yield() + Signals OS that the current thread is willing to relinquish the remainder + of its time quantum. + + __TBB_full_memory_fence() + Must prevent all memory operations from being reordered across it (both + by hardware and compiler). All such fences must be totally ordered (or + sequentially consistent). + + __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand ) + Must be provided if __TBB_USE_FENCED_ATOMICS is not set. + + __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand ) + Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set, + and for 32-bit architectures if __TBB_64BIT_ATOMICS is set + + __TBB_machine_(...), where + = {cmpswp, fetchadd, fetchstore} + = {1, 2, 4, 8} + = {full_fence, acquire, release, relaxed} + Must be provided if __TBB_USE_FENCED_ATOMICS is set. + + __TBB_control_consistency_helper() + Bridges the memory-semantics gap between architectures providing only + implicit C++0x "consume" semantics (like Power Architecture) and those + also implicitly obeying control dependencies (like Itanium). + It must be used only in conditional code where the condition is itself + data-dependent, and will then make subsequent code behave as if the + original data dependency were acquired. + It needs only an empty definition where implied by the architecture + either specifically (Itanium) or because generally stronger C++0x "acquire" + semantics are enforced (like x86). + + __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() + Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set. + Enforce acquire and release semantics in generic implementations of fenced + store and load operations. Depending on the particular architecture/compiler + combination they may be a hardware fence, a compiler fence, both or nothing. + **/ + #include "tbb_stddef.h" +namespace tbb { +namespace internal { + +//////////////////////////////////////////////////////////////////////////////// +// Overridable helpers declarations +// +// A machine/*.h file may choose to define these templates, otherwise it must +// request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s). +// +template +struct machine_load_store; + +template +struct machine_load_store_relaxed; + +template +struct machine_load_store_seq_cst; +// +// End of overridable helpers declarations +//////////////////////////////////////////////////////////////////////////////// + +template struct atomic_selector; + +template<> struct atomic_selector<1> { + typedef int8_t word; + inline static word fetch_store ( volatile void* location, word value ); +}; + +template<> struct atomic_selector<2> { + typedef int16_t word; + inline static word fetch_store ( volatile void* location, word value ); +}; + +template<> struct atomic_selector<4> { +#if _MSC_VER && !_WIN64 + // Work-around that avoids spurious /Wp64 warnings + typedef intptr_t word; +#else + typedef int32_t word; +#endif + inline static word fetch_store ( volatile void* location, word value ); +}; + +template<> struct atomic_selector<8> { + typedef int64_t word; + inline static word fetch_store ( volatile void* location, word value ); +}; + +}} // namespaces internal, tbb + #if _WIN32||_WIN64 #ifdef _MANAGED @@ -49,9 +174,9 @@ #endif #elif defined(_M_IX86) #include "machine/windows_ia32.h" - #elif defined(_M_AMD64) + #elif defined(_M_X64) #include "machine/windows_intel64.h" - #elif _XBOX + #elif _XBOX #include "machine/xbox360_ppc.h" #endif @@ -93,9 +218,9 @@ #elif __sun || __SUNPRO_CC - #define __asm__ asm + #define __asm__ asm #define __volatile__ volatile - + #if __i386 || __i386__ #include "machine/linux_ia32.h" #elif __x86_64__ @@ -110,29 +235,46 @@ #endif /* OS selection */ #ifndef __TBB_64BIT_ATOMICS -#define __TBB_64BIT_ATOMICS 1 + #define __TBB_64BIT_ATOMICS 1 #endif -//! Prerequisites for each architecture port -/** There are no generic implementation for these macros so they have to be implemented - in each machine architecture specific header. +// Special atomic functions +#if __TBB_USE_FENCED_ATOMICS + #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence + #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence + #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence + #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence + + #if __TBB_WORDSIZE==8 + #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence + #define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence + #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V) + #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1) + #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1)) + #else + #error Define macros for 4-byte word, similarly to the above __TBB_WORDSIZE==8 branch. + #endif /* __TBB_WORDSIZE==4 */ +#else /* !__TBB_USE_FENCED_ATOMICS */ + #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) + #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) + #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) +#endif /* !__TBB_USE_FENCED_ATOMICS */ + +#if __TBB_WORDSIZE==4 + #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) + #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V) + #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V) +#elif __TBB_WORDSIZE==8 + #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE + #error These macros should only be used on 32-bit platforms. + #endif - __TBB_full_memory_fence must prevent all memory operations from being reordered - across the fence. And all such fences must be totally ordered (or sequentially - consistent). These fence must affect both compiler and hardware. - - __TBB_release_consistency_helper is used to enforce guarantees of acquire or - release semantics in generic implementations of __TBB_load_with_acquire and - __TBB_store_with_release below. Depending on the particular combination of - architecture+compiler it can be a hardware fence, a compiler fence, both or - nothing. **/ -#if !defined(__TBB_CompareAndSwap4) \ - || !defined(__TBB_CompareAndSwap8) && __TBB_64BIT_ATOMICS \ - || !defined(__TBB_Yield) \ - || !defined(__TBB_full_memory_fence) \ - || !defined(__TBB_release_consistency_helper) -#error Minimal requirements for tbb_machine.h not satisfied; platform is not supported. -#endif + #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) + #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V) + #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V) +#else /* __TBB_WORDSIZE != 8 */ + #error Unsupported machine word size. +#endif /* __TBB_WORDSIZE */ #ifndef __TBB_Pause inline void __TBB_Pause(int32_t) { @@ -150,7 +292,7 @@ namespace internal { //! Class that implements exponential backoff. /** See implementation of spin_wait_while_eq for an example. */ class atomic_backoff : no_copy { - //! Time delay, in units of "pause" instructions. + //! Time delay, in units of "pause" instructions. /** Should be equal to approximately the number of "pause" instructions that take the same time as an context switch. */ static const int32_t LOOPS_BEFORE_YIELD = 16; @@ -221,8 +363,9 @@ inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) { result = *base; // reload the base value which might change during the pause uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset ); uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); - // __TBB_CompareAndSwap4 presumed to have full fence. - result = __TBB_CompareAndSwap4( base, new_value, old_value ); + // __TBB_CompareAndSwap4 presumed to have full fence. + // Cast shuts up /Wp64 warning + result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_value ); if( result==old_value // CAS succeeded || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of interest have changed break; @@ -233,37 +376,36 @@ inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) { } template -inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { - return __TBB_CompareAndSwapW((T *)ptr,value,comparand); -} +inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ); template<> inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) { -#ifdef __TBB_CompareAndSwap1 - return __TBB_CompareAndSwap1(ptr,value,comparand); -#else +#if __TBB_USE_GENERIC_PART_WORD_CAS return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand); +#else + return __TBB_machine_cmpswp1(ptr,value,comparand); #endif } template<> inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) { -#ifdef __TBB_CompareAndSwap2 - return __TBB_CompareAndSwap2(ptr,value,comparand); -#else +#if __TBB_USE_GENERIC_PART_WORD_CAS return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand); +#else + return __TBB_machine_cmpswp2(ptr,value,comparand); #endif } template<> -inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { - return __TBB_CompareAndSwap4(ptr,value,comparand); +inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { + // Cast shuts up /Wp64 warning + return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand); } #if __TBB_64BIT_ATOMICS template<> -inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { - return __TBB_CompareAndSwap8(ptr,value,comparand); +inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { + return __TBB_machine_cmpswp8(ptr,value,comparand); } #endif @@ -273,8 +415,8 @@ inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { T result; for(;;) { result = *reinterpret_cast(ptr); - // __TBB_CompareAndSwapGeneric presumed to have full fence. - if( __TBB_CompareAndSwapGeneric ( ptr, result+addend, result )==result ) + // __TBB_CompareAndSwapGeneric presumed to have full fence. + if( __TBB_CompareAndSwapGeneric ( ptr, result+addend, result )==result ) break; b.pause(); } @@ -288,59 +430,275 @@ inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) { for(;;) { result = *reinterpret_cast(ptr); // __TBB_CompareAndSwapGeneric presumed to have full fence. - if( __TBB_CompareAndSwapGeneric ( ptr, value, result )==result ) + if( __TBB_CompareAndSwapGeneric ( ptr, value, result )==result ) break; b.pause(); } return result; } +#if __TBB_USE_GENERIC_PART_WORD_CAS +#define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t> +#define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD +#define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t> +#define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_ADD +#define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD +#define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE +#define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t> +#define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_STORE +#define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t> +#endif + +#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE +#define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t> +#endif + +#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE +#define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \ + atomic_selector::word atomic_selector::fetch_store ( volatile void* location, word value ) { \ + return __TBB_machine_fetchstore##S( location, value ); \ + } + +__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1) +__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2) +__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4) +__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8) + +#undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE +#endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ + +#if __TBB_USE_GENERIC_DWORD_LOAD_STORE +inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) { + for(;;) { + int64_t result = *(int64_t *)ptr; + if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break; + } +} + +inline int64_t __TBB_machine_load8 (const volatile void *ptr) { + // Comparand and new value may be anything, they only must be equal, and + // the value should have a low probability to be actually found in 'location'. + const int64_t anyvalue = 2305843009213693951; + return __TBB_machine_cmpswp8(const_cast(ptr),anyvalue,anyvalue); +} +#endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ + +#if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE +/** Fenced operations use volatile qualifier to prevent compiler from optimizing + them out, and on on architectures with weak memory ordering to induce compiler + to generate code with appropriate acquire/release semantics. + On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile has + no effect on code gen, and consistency helpers serve as a compiler fence (the + latter being true for IA64/gcc as well to fix a bug in some gcc versions). **/ +template +struct machine_load_store { + static T load_with_acquire ( const volatile T& location ) { + T to_return = location; + __TBB_acquire_consistency_helper(); + return to_return; + } + static void store_with_release ( volatile T &location, T value ) { + __TBB_release_consistency_helper(); + location = value; + } +}; + +#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS +template +struct machine_load_store { + static T load_with_acquire ( const volatile T& location ) { + return (T)__TBB_machine_load8( (const volatile void*)&location ); + } + static void store_with_release ( volatile T& location, T value ) { + __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); + } +}; +#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ +#endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ + +template +struct machine_load_store_seq_cst { + static T load ( const volatile T& location ) { + __TBB_full_memory_fence(); + return machine_load_store::load_with_acquire( location ); + } +#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE + static void store ( volatile T &location, T value ) { + atomic_selector::fetch_store( (volatile void*)&location, (typename atomic_selector::word)value ); + } +#else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ + static void store ( volatile T &location, T value ) { + machine_load_store::store_with_release( location, value ); + __TBB_full_memory_fence(); + } +#endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ +}; + +#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS +/** The implementation does not use functions __TBB_machine_load8/store8 as they + are not required to be sequentially consistent. **/ +template +struct machine_load_store_seq_cst { + static T load ( const volatile T& location ) { + // Comparand and new value may be anything, they only must be equal, and + // the value should have a low probability to be actually found in 'location'. + const int64_t anyvalue = 2305843009213693951ll; + return __TBB_machine_cmpswp8( (volatile void*)const_cast(&location), anyvalue, anyvalue ); + } + static void store ( volatile T &location, T value ) { + int64_t result = (volatile int64_t&)location; + while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result ) + result = (volatile int64_t&)location; + } +}; +#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ + +#if __TBB_USE_GENERIC_RELAXED_LOAD_STORE +// Relaxed operations add volatile qualifier to prevent compiler from optimizing them out. +/** Volatile should not incur any additional cost on IA32, Intel64, and Sparc TSO + architectures. However on architectures with weak memory ordering compiler may + generate code with acquire/release semantics for operations on volatile data. **/ +template +struct machine_load_store_relaxed { + static inline T load ( const volatile T& location ) { + return location; + } + static inline void store ( volatile T& location, T value ) { + location = value; + } +}; + +#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS +template +struct machine_load_store_relaxed { + static inline T load ( const volatile T& location ) { + return (T)__TBB_machine_load8( (const volatile void*)&location ); + } + static inline void store ( volatile T& location, T value ) { + __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); + } +}; +#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ +#endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ + +template +inline T __TBB_load_with_acquire(const volatile T &location) { + return machine_load_store::load_with_acquire( location ); +} +template +inline void __TBB_store_with_release(volatile T& location, V value) { + machine_load_store::store_with_release( location, T(value) ); +} +//! Overload that exists solely to avoid /Wp64 warnings. +inline void __TBB_store_with_release(volatile size_t& location, size_t value) { + machine_load_store::store_with_release( location, value ); +} + +template +inline T __TBB_load_full_fence(const volatile T &location) { + return machine_load_store_seq_cst::load( location ); +} +template +inline void __TBB_store_full_fence(volatile T& location, V value) { + machine_load_store_seq_cst::store( location, T(value) ); +} +//! Overload that exists solely to avoid /Wp64 warnings. +inline void __TBB_store_full_fence(volatile size_t& location, size_t value) { + machine_load_store_seq_cst::store( location, value ); +} + +template +inline T __TBB_load_relaxed (const volatile T& location) { + return machine_load_store_relaxed::load( const_cast(location) ); +} +template +inline void __TBB_store_relaxed ( volatile T& location, V value ) { + machine_load_store_relaxed::store( const_cast(location), T(value) ); +} +//! Overload that exists solely to avoid /Wp64 warnings. +inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) { + machine_load_store_relaxed::store( const_cast(location), value ); +} + // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as -// strict as type T. Type type should have a trivial default constructor and destructor, so that -// arrays of that type can be declared without initializers. +// strict as type T. The type should have a trivial default constructor and destructor, so that +// arrays of that type can be declared without initializers. // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands // to a type bigger than T. // The default definition here works on machines where integers are naturally aligned and the -// strictest alignment is 16. +// strictest alignment is 64. #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict -#if __GNUC__ || __SUNPRO_CC || __IBMCPP__ -struct __TBB_machine_type_with_strictest_alignment { - int member[4]; -} __attribute__((aligned(16))); -#elif _MSC_VER -__declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment { - int member[4]; +#if __TBB_ATTRIBUTE_ALIGNED_PRESENT + +#define __TBB_DefineTypeWithAlignment(PowerOf2) \ +struct __TBB_machine_type_with_alignment_##PowerOf2 { \ + uint32_t member[PowerOf2/sizeof(uint32_t)]; \ +} __attribute__((aligned(PowerOf2))); +#define __TBB_alignof(T) __alignof__(T) + +#elif __TBB_DECLSPEC_ALIGN_PRESENT + +#define __TBB_DefineTypeWithAlignment(PowerOf2) \ +__declspec(align(PowerOf2)) \ +struct __TBB_machine_type_with_alignment_##PowerOf2 { \ + uint32_t member[PowerOf2/sizeof(uint32_t)]; \ }; -#else -#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment +#define __TBB_alignof(T) __alignof(T) + +#else /* A compiler with unknown syntax for data alignment */ +#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) #endif -template struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;}; +/* Now declare types aligned to useful powers of two */ +// TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms? +__TBB_DefineTypeWithAlignment(16) +__TBB_DefineTypeWithAlignment(32) +__TBB_DefineTypeWithAlignment(64) + +typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment; + +// Primary template is a declaration of incomplete type so that it fails with unknown alignments +template struct type_with_alignment; + +// Specializations for allowed alignments template<> struct type_with_alignment<1> { char member; }; template<> struct type_with_alignment<2> { uint16_t member; }; template<> struct type_with_alignment<4> { uint32_t member; }; template<> struct type_with_alignment<8> { uint64_t member; }; +template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; }; +template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; }; +template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; }; -#if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2 +#if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN //! Work around for bug in GNU 3.2 and MSVC compilers. /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated. The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */ -template +template struct work_around_alignment_bug { -#if _MSC_VER - static const size_t alignment = __alignof(T); -#else - static const size_t alignment = __alignof__(T); -#endif + static const size_t alignment = __TBB_alignof(T); }; #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment::alignment> -#elif __GNUC__ || __SUNPRO_CC || __IBMCPP__ -#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)> #else -#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment -#endif -#endif /* ____TBB_TypeWithAlignmentAtLeastAsStrict */ +#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)> +#endif /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */ + +#endif /* __TBB_TypeWithAlignmentAtLeastAsStrict */ // Template class here is to avoid instantiation of the static data for modules that don't use it template @@ -372,262 +730,13 @@ const T reverse::byte_table[256] = { } // namespace internal } // namespace tbb -#ifndef __TBB_CompareAndSwap1 -#define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t> -#endif - -#ifndef __TBB_CompareAndSwap2 -#define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t> -#endif - -#ifndef __TBB_CompareAndSwapW -#define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric -#endif - -#ifndef __TBB_FetchAndAdd1 -#define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t> -#endif - -#ifndef __TBB_FetchAndAdd2 -#define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t> -#endif - -#ifndef __TBB_FetchAndAdd4 -#define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t> -#endif - -#ifndef __TBB_FetchAndAdd8 -#define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t> -#endif - -#ifndef __TBB_FetchAndAddW -#define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric -#endif - -#ifndef __TBB_FetchAndStore1 -#define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t> -#endif - -#ifndef __TBB_FetchAndStore2 -#define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t> -#endif - -#ifndef __TBB_FetchAndStore4 -#define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t> -#endif - -#ifndef __TBB_FetchAndStore8 -#define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t> -#endif - -#ifndef __TBB_FetchAndStoreW -#define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric -#endif - -#if __TBB_DECL_FENCED_ATOMICS - -#ifndef __TBB_CompareAndSwap1__TBB_full_fence -#define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1 -#endif -#ifndef __TBB_CompareAndSwap1acquire -#define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence -#endif -#ifndef __TBB_CompareAndSwap1release -#define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence -#endif - -#ifndef __TBB_CompareAndSwap2__TBB_full_fence -#define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2 -#endif -#ifndef __TBB_CompareAndSwap2acquire -#define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence -#endif -#ifndef __TBB_CompareAndSwap2release -#define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence -#endif - -#ifndef __TBB_CompareAndSwap4__TBB_full_fence -#define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4 -#endif -#ifndef __TBB_CompareAndSwap4acquire -#define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence -#endif -#ifndef __TBB_CompareAndSwap4release -#define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence -#endif - -#ifndef __TBB_CompareAndSwap8__TBB_full_fence -#define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8 -#endif -#ifndef __TBB_CompareAndSwap8acquire -#define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence -#endif -#ifndef __TBB_CompareAndSwap8release -#define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndAdd1__TBB_full_fence -#define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1 -#endif -#ifndef __TBB_FetchAndAdd1acquire -#define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence -#endif -#ifndef __TBB_FetchAndAdd1release -#define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndAdd2__TBB_full_fence -#define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2 -#endif -#ifndef __TBB_FetchAndAdd2acquire -#define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence -#endif -#ifndef __TBB_FetchAndAdd2release -#define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndAdd4__TBB_full_fence -#define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4 -#endif -#ifndef __TBB_FetchAndAdd4acquire -#define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence -#endif -#ifndef __TBB_FetchAndAdd4release -#define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndAdd8__TBB_full_fence -#define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8 -#endif -#ifndef __TBB_FetchAndAdd8acquire -#define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence -#endif -#ifndef __TBB_FetchAndAdd8release -#define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndStore1__TBB_full_fence -#define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1 -#endif -#ifndef __TBB_FetchAndStore1acquire -#define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence -#endif -#ifndef __TBB_FetchAndStore1release -#define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndStore2__TBB_full_fence -#define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2 -#endif -#ifndef __TBB_FetchAndStore2acquire -#define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence -#endif -#ifndef __TBB_FetchAndStore2release -#define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndStore4__TBB_full_fence -#define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4 -#endif -#ifndef __TBB_FetchAndStore4acquire -#define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence -#endif -#ifndef __TBB_FetchAndStore4release -#define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence -#endif - -#ifndef __TBB_FetchAndStore8__TBB_full_fence -#define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8 -#endif -#ifndef __TBB_FetchAndStore8acquire -#define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence -#endif -#ifndef __TBB_FetchAndStore8release -#define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence -#endif - -#endif // __TBB_DECL_FENCED_ATOMICS - -// Special atomic functions -#ifndef __TBB_FetchAndAddWrelease -#define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW -#endif - -#ifndef __TBB_FetchAndIncrementWacquire -#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) -#endif +// Preserving access to legacy APIs +using tbb::internal::__TBB_load_with_acquire; +using tbb::internal::__TBB_store_with_release; -#ifndef __TBB_FetchAndDecrementWrelease -#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) -#endif - -template -struct __TBB_machine_load_store { - static inline T load_with_acquire(const volatile T& location) { - T to_return = location; - __TBB_release_consistency_helper(); - return to_return; - } - - static inline void store_with_release(volatile T &location, T value) { - __TBB_release_consistency_helper(); - location = value; - } -}; - -#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS -#if _MSC_VER -using tbb::internal::int64_t; -#endif -// On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8 -#ifndef __TBB_Store8 -inline void __TBB_Store8 (volatile void *ptr, int64_t value) { - for(;;) { - int64_t result = *(int64_t *)ptr; - if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break; - } -} -#endif - -#ifndef __TBB_Load8 -inline int64_t __TBB_Load8 (const volatile void *ptr) { - const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value - return __TBB_CompareAndSwap8(const_cast(ptr),anyvalue,anyvalue); -} -#endif - -template -struct __TBB_machine_load_store { - static inline T load_with_acquire(const volatile T& location) { - T to_return = (T)__TBB_Load8((const volatile void*)&location); - __TBB_release_consistency_helper(); - return to_return; - } - - static inline void store_with_release(volatile T& location, T value) { - __TBB_release_consistency_helper(); - __TBB_Store8((volatile void *)&location,(int64_t)value); - } -}; -#endif /* __TBB_WORDSIZE==4 */ - -#ifndef __TBB_load_with_acquire -template -inline T __TBB_load_with_acquire(const volatile T &location) { - return __TBB_machine_load_store::load_with_acquire(location); -} -#endif - -#ifndef __TBB_store_with_release -template -inline void __TBB_store_with_release(volatile T& location, V value) { - __TBB_machine_load_store::store_with_release(location,T(value)); -} -//! Overload that exists solely to avoid /Wp64 warnings. -inline void __TBB_store_with_release(volatile size_t& location, size_t value) { - __TBB_machine_load_store::store_with_release(location,value); -} -#endif +// Mapping historically used names to the ones expected by atomic_load_store_traits +#define __TBB_load_acquire __TBB_load_with_acquire +#define __TBB_store_release __TBB_store_with_release #ifndef __TBB_Log2 inline intptr_t __TBB_Log2( uintptr_t x ) { @@ -669,18 +778,19 @@ inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) { } #endif -#ifndef __TBB_Byte -typedef unsigned char __TBB_Byte; +#ifndef __TBB_Flag +typedef unsigned char __TBB_Flag; #endif +typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag; #ifndef __TBB_TryLockByte -inline bool __TBB_TryLockByte( __TBB_Byte &flag ) { - return __TBB_CompareAndSwap1(&flag,1,0)==0; +inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) { + return __TBB_machine_cmpswp1(&flag,1,0)==0; } #endif #ifndef __TBB_LockByte -inline uintptr_t __TBB_LockByte( __TBB_Byte& flag ) { +inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) { if ( !__TBB_TryLockByte(flag) ) { tbb::internal::atomic_backoff b; do { @@ -700,8 +810,7 @@ inline unsigned char __TBB_ReverseByte(unsigned char src) { #endif template -T __TBB_ReverseBits(T src) -{ +T __TBB_ReverseBits(T src) { T dst; unsigned char *original = (unsigned char *) &src; unsigned char *reversed = (unsigned char *) &dst;