X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=tbb%2Finclude%2Ftbb%2Fmachine%2Flinux_ia64.h;h=c6313cbd468f68e7fa5e2529452d772449621049;hb=4c96b4064a92f65beead3cb2453d727187bed40f;hp=b815d3c086c2d480e03594f2056945aba77eb220;hpb=46ab0514ba58ee00183ff0584c7ea7c9e3d76494;p=casparcg diff --git a/tbb/include/tbb/machine/linux_ia64.h b/tbb/include/tbb/machine/linux_ia64.h index b815d3c08..c6313cbd4 100644 --- a/tbb/include/tbb/machine/linux_ia64.h +++ b/tbb/include/tbb/machine/linux_ia64.h @@ -26,64 +26,85 @@ the GNU General Public License. */ -#ifndef __TBB_machine_H +#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H) #error Do not include this file directly; include tbb_machine.h instead #endif +#define __TBB_machine_linux_ia64_H + #include #include #include #define __TBB_WORDSIZE 8 #define __TBB_BIG_ENDIAN 0 -#define __TBB_DECL_FENCED_ATOMICS 1 + +#if __INTEL_COMPILER + #define __TBB_compiler_fence() + #define __TBB_control_consistency_helper() __TBB_compiler_fence() + #define __TBB_acquire_consistency_helper() + #define __TBB_release_consistency_helper() + #define __TBB_full_memory_fence() __mf() +#else + #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") + #define __TBB_control_consistency_helper() __TBB_compiler_fence() + // Even though GCC imbues volatile loads with acquire semantics, it sometimes moves + // loads over the acquire fence. The following helpers stop such incorrect code motion. + #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() + #define __TBB_release_consistency_helper() __TBB_compiler_fence() + #define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory") +#endif /* !__INTEL_COMPILER */ // Most of the functions will be in a .s file extern "C" { - int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand); int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend); int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend); int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend); - int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value); - int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value); - int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand); int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend); int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend); int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend); + + int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value); + int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend); + int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend); + + int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value); + int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend); + int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend); + + int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value); + int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value); + int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value); + + int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value); int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value); int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value); int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value); int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value); int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value); - int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend); - int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend); - int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand); int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value); int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value); int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value); - int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend); - int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend); + int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand); int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand); int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand); - int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value); + int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand); int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand); int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand); - int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value); int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand); int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand); int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand); - int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value); + int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand); int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand); int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand); - int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value); int64_t __TBB_machine_lg(uint64_t value); void __TBB_machine_pause(int32_t delay); @@ -92,73 +113,71 @@ extern "C" { //! Retrieves the current RSE backing store pointer. IA64 specific. void* __TBB_get_bsp(); -} - -#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1__TBB_full_fence(P,V,C) -#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2__TBB_full_fence(P,V,C) - -#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1__TBB_full_fence(P,V) -#define __TBB_FetchAndAdd1acquire(P,V) __TBB_machine_fetchadd1acquire(P,V) -#define __TBB_FetchAndAdd1release(P,V) __TBB_machine_fetchadd1release(P,V) -#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2__TBB_full_fence(P,V) -#define __TBB_FetchAndAdd2acquire(P,V) __TBB_machine_fetchadd2acquire(P,V) -#define __TBB_FetchAndAdd2release(P,V) __TBB_machine_fetchadd2release(P,V) -#define __TBB_FetchAndAdd4acquire(P,V) __TBB_machine_fetchadd4acquire(P,V) -#define __TBB_FetchAndAdd4release(P,V) __TBB_machine_fetchadd4release(P,V) -#define __TBB_FetchAndAdd8acquire(P,V) __TBB_machine_fetchadd8acquire(P,V) -#define __TBB_FetchAndAdd8release(P,V) __TBB_machine_fetchadd8release(P,V) - -#define __TBB_FetchAndStore1acquire(P,V) __TBB_machine_fetchstore1acquire(P,V) -#define __TBB_FetchAndStore1release(P,V) __TBB_machine_fetchstore1release(P,V) -#define __TBB_FetchAndStore2acquire(P,V) __TBB_machine_fetchstore2acquire(P,V) -#define __TBB_FetchAndStore2release(P,V) __TBB_machine_fetchstore2release(P,V) -#define __TBB_FetchAndStore4acquire(P,V) __TBB_machine_fetchstore4acquire(P,V) -#define __TBB_FetchAndStore4release(P,V) __TBB_machine_fetchstore4release(P,V) -#define __TBB_FetchAndStore8acquire(P,V) __TBB_machine_fetchstore8acquire(P,V) -#define __TBB_FetchAndStore8release(P,V) __TBB_machine_fetchstore8release(P,V) - -#define __TBB_CompareAndSwap1acquire(P,V,C) __TBB_machine_cmpswp1acquire(P,V,C) -#define __TBB_CompareAndSwap1release(P,V,C) __TBB_machine_cmpswp1release(P,V,C) -#define __TBB_CompareAndSwap2acquire(P,V,C) __TBB_machine_cmpswp2acquire(P,V,C) -#define __TBB_CompareAndSwap2release(P,V,C) __TBB_machine_cmpswp2release(P,V,C) -#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4__TBB_full_fence(P,V,C) -#define __TBB_CompareAndSwap4acquire(P,V,C) __TBB_machine_cmpswp4acquire(P,V,C) -#define __TBB_CompareAndSwap4release(P,V,C) __TBB_machine_cmpswp4release(P,V,C) -#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8__TBB_full_fence(P,V,C) -#define __TBB_CompareAndSwap8acquire(P,V,C) __TBB_machine_cmpswp8acquire(P,V,C) -#define __TBB_CompareAndSwap8release(P,V,C) __TBB_machine_cmpswp8release(P,V,C) - -#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4__TBB_full_fence(P,V) -#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8__TBB_full_fence(P,V) - -#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1__TBB_full_fence(P,V) -#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2__TBB_full_fence(P,V) -#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4__TBB_full_fence(P,V) -#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8__TBB_full_fence(P,V) - -#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAdd8acquire(P,1) -#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAdd8release(P,-1) - -#ifndef __INTEL_COMPILER -/* Even though GCC imbues volatile loads with acquire semantics, - it sometimes moves loads over the acquire fence. The - fences defined here stop such incorrect code motion. */ -#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"memory") -#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory") -#else -#define __TBB_release_consistency_helper() -#define __TBB_full_memory_fence() __mf() -#endif /* __INTEL_COMPILER */ -// Special atomic functions -#define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap8(P,V,C) -#define __TBB_FetchAndStoreW(P,V) __TBB_FetchAndStore8(P,V) -#define __TBB_FetchAndAddW(P,V) __TBB_FetchAndAdd8(P,V) -#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAdd8release(P,V) - -// Not needed -#undef __TBB_Store8 -#undef __TBB_Load8 + int32_t __TBB_machine_load1_relaxed(const void *ptr); + int32_t __TBB_machine_load2_relaxed(const void *ptr); + int32_t __TBB_machine_load4_relaxed(const void *ptr); + int64_t __TBB_machine_load8_relaxed(const void *ptr); + + void __TBB_machine_store1_relaxed(void *ptr, int32_t value); + void __TBB_machine_store2_relaxed(void *ptr, int32_t value); + void __TBB_machine_store4_relaxed(void *ptr, int32_t value); + void __TBB_machine_store8_relaxed(void *ptr, int64_t value); +} // extern "C" + +// Mapping old entry points to the names corresponding to the new full_fence identifier. +#define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_full_fence +#define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_full_fence +#define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_full_fence +#define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_full_fence +#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence +#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence +#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence +#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence +#define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full_fence +#define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full_fence +#define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full_fence +#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full_fence + +// Mapping relaxed operations to the entry points implementing them. +/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot + actually have completely relaxed RMW operation here. **/ +#define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire +#define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire +#define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire +#define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire +#define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquire +#define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquire +#define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquire +#define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquire +#define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire +#define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire +#define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire +#define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire + +#define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \ + template \ + struct machine_load_store_relaxed { \ + static inline T load ( const T& location ) { \ + return (T)__TBB_machine_load##S##_relaxed(&location); \ + } \ + static inline void store ( T& location, T value ) { \ + __TBB_machine_store##S##_relaxed(&location, (V)value); \ + } \ + } + +namespace tbb { +namespace internal { + __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t); + __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t); + __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t); + __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t); +}} // namespaces internal, tbb + +#undef __TBB_MACHINE_DEFINE_ATOMICS + +#define __TBB_USE_FENCED_ATOMICS 1 +#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 // Definition of Lock functions #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) @@ -167,4 +186,3 @@ extern "C" { // Definition of other utility functions #define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Log2(V) __TBB_machine_lg(V) -