]> git.sesse.net Git - casparcg/blobdiff - tbb/include/tbb/machine/linux_ia64.h
2.0. Updated tbb library.
[casparcg] / tbb / include / tbb / machine / linux_ia64.h
index b815d3c086c2d480e03594f2056945aba77eb220..c6313cbd468f68e7fa5e2529452d772449621049 100644 (file)
     the GNU General Public License.
 */
 
-#ifndef __TBB_machine_H
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
 #error Do not include this file directly; include tbb_machine.h instead
 #endif
 
+#define __TBB_machine_linux_ia64_H
+
 #include <stdint.h>
 #include <unistd.h>
 #include <ia64intrin.h>
 
 #define __TBB_WORDSIZE 8
 #define __TBB_BIG_ENDIAN 0
-#define __TBB_DECL_FENCED_ATOMICS 1
+
+#if __INTEL_COMPILER
+    #define __TBB_compiler_fence()
+    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_acquire_consistency_helper()
+    #define __TBB_release_consistency_helper()
+    #define __TBB_full_memory_fence()          __mf()
+#else
+    #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
+    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
+    // Even though GCC imbues volatile loads with acquire semantics, it sometimes moves 
+    // loads over the acquire fence. The following helpers stop such incorrect code motion.
+    #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_release_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_full_memory_fence()          __asm__ __volatile__("mf": : :"memory")
+#endif /* !__INTEL_COMPILER */
 
 // Most of the functions will be in a .s file
 
 extern "C" {
-    int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand); 
     int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
     int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
     int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
-    int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
-    int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
 
-    int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
     int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
     int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
     int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
+
+    int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
+    int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
+
+    int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
+    int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
+
+    int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
+    int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
+    int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
+
+    int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
     int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
     int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
 
     int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
     int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
     int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
-    int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
-    int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
 
-    int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
     int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
     int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
     int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
-    int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
-    int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
 
+    int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand); 
     int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand); 
     int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand); 
-    int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
 
+    int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
     int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand); 
     int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand); 
-    int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
 
     int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
     int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand); 
     int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand); 
-    int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
 
+    int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
     int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand); 
     int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand); 
-    int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
 
     int64_t __TBB_machine_lg(uint64_t value);
     void __TBB_machine_pause(int32_t delay);
@@ -92,73 +113,71 @@ extern "C" {
 
     //! Retrieves the current RSE backing store pointer. IA64 specific.
     void* __TBB_get_bsp();
-}
-
-#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1__TBB_full_fence(P,V,C)
-#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2__TBB_full_fence(P,V,C) 
-
-#define __TBB_FetchAndAdd1(P,V)        __TBB_machine_fetchadd1__TBB_full_fence(P,V)
-#define __TBB_FetchAndAdd1acquire(P,V) __TBB_machine_fetchadd1acquire(P,V)
-#define __TBB_FetchAndAdd1release(P,V) __TBB_machine_fetchadd1release(P,V)
-#define __TBB_FetchAndAdd2(P,V)        __TBB_machine_fetchadd2__TBB_full_fence(P,V)
-#define __TBB_FetchAndAdd2acquire(P,V) __TBB_machine_fetchadd2acquire(P,V)
-#define __TBB_FetchAndAdd2release(P,V) __TBB_machine_fetchadd2release(P,V)
-#define __TBB_FetchAndAdd4acquire(P,V) __TBB_machine_fetchadd4acquire(P,V)
-#define __TBB_FetchAndAdd4release(P,V) __TBB_machine_fetchadd4release(P,V)
-#define __TBB_FetchAndAdd8acquire(P,V) __TBB_machine_fetchadd8acquire(P,V)
-#define __TBB_FetchAndAdd8release(P,V) __TBB_machine_fetchadd8release(P,V)
-
-#define __TBB_FetchAndStore1acquire(P,V) __TBB_machine_fetchstore1acquire(P,V)
-#define __TBB_FetchAndStore1release(P,V) __TBB_machine_fetchstore1release(P,V)
-#define __TBB_FetchAndStore2acquire(P,V) __TBB_machine_fetchstore2acquire(P,V)
-#define __TBB_FetchAndStore2release(P,V) __TBB_machine_fetchstore2release(P,V)
-#define __TBB_FetchAndStore4acquire(P,V) __TBB_machine_fetchstore4acquire(P,V)
-#define __TBB_FetchAndStore4release(P,V) __TBB_machine_fetchstore4release(P,V)
-#define __TBB_FetchAndStore8acquire(P,V) __TBB_machine_fetchstore8acquire(P,V)
-#define __TBB_FetchAndStore8release(P,V) __TBB_machine_fetchstore8release(P,V)
-
-#define __TBB_CompareAndSwap1acquire(P,V,C) __TBB_machine_cmpswp1acquire(P,V,C)
-#define __TBB_CompareAndSwap1release(P,V,C) __TBB_machine_cmpswp1release(P,V,C)
-#define __TBB_CompareAndSwap2acquire(P,V,C) __TBB_machine_cmpswp2acquire(P,V,C)
-#define __TBB_CompareAndSwap2release(P,V,C) __TBB_machine_cmpswp2release(P,V,C)
-#define __TBB_CompareAndSwap4(P,V,C)        __TBB_machine_cmpswp4__TBB_full_fence(P,V,C)
-#define __TBB_CompareAndSwap4acquire(P,V,C) __TBB_machine_cmpswp4acquire(P,V,C)
-#define __TBB_CompareAndSwap4release(P,V,C) __TBB_machine_cmpswp4release(P,V,C)
-#define __TBB_CompareAndSwap8(P,V,C)        __TBB_machine_cmpswp8__TBB_full_fence(P,V,C)
-#define __TBB_CompareAndSwap8acquire(P,V,C) __TBB_machine_cmpswp8acquire(P,V,C)
-#define __TBB_CompareAndSwap8release(P,V,C) __TBB_machine_cmpswp8release(P,V,C)
-
-#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4__TBB_full_fence(P,V)
-#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8__TBB_full_fence(P,V)
-
-#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1__TBB_full_fence(P,V)
-#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2__TBB_full_fence(P,V)
-#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4__TBB_full_fence(P,V)
-#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8__TBB_full_fence(P,V)
-
-#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAdd8acquire(P,1)
-#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAdd8release(P,-1)
-
-#ifndef __INTEL_COMPILER
-/* Even though GCC imbues volatile loads with acquire semantics, 
-   it sometimes moves loads over the acquire fence.  The
-   fences defined here stop such incorrect code motion. */
-#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"memory")
-#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory")
-#else
-#define __TBB_release_consistency_helper()
-#define __TBB_full_memory_fence() __mf()
-#endif /* __INTEL_COMPILER */
 
-// Special atomic functions
-#define __TBB_CompareAndSwapW(P,V,C)   __TBB_CompareAndSwap8(P,V,C)
-#define __TBB_FetchAndStoreW(P,V)      __TBB_FetchAndStore8(P,V)
-#define __TBB_FetchAndAddW(P,V)        __TBB_FetchAndAdd8(P,V)
-#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAdd8release(P,V)
-
-// Not needed
-#undef __TBB_Store8
-#undef __TBB_Load8
+    int32_t __TBB_machine_load1_relaxed(const void *ptr);
+    int32_t __TBB_machine_load2_relaxed(const void *ptr);
+    int32_t __TBB_machine_load4_relaxed(const void *ptr);
+    int64_t __TBB_machine_load8_relaxed(const void *ptr);
+
+    void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
+} // extern "C"
+
+// Mapping old entry points to the names corresponding to the new full_fence identifier.
+#define __TBB_machine_fetchadd1full_fence   __TBB_machine_fetchadd1__TBB_full_fence
+#define __TBB_machine_fetchadd2full_fence   __TBB_machine_fetchadd2__TBB_full_fence
+#define __TBB_machine_fetchadd4full_fence   __TBB_machine_fetchadd4__TBB_full_fence
+#define __TBB_machine_fetchadd8full_fence   __TBB_machine_fetchadd8__TBB_full_fence
+#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
+#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
+#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
+#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
+#define __TBB_machine_cmpswp1full_fence     __TBB_machine_cmpswp1__TBB_full_fence
+#define __TBB_machine_cmpswp2full_fence     __TBB_machine_cmpswp2__TBB_full_fence 
+#define __TBB_machine_cmpswp4full_fence     __TBB_machine_cmpswp4__TBB_full_fence
+#define __TBB_machine_cmpswp8full_fence     __TBB_machine_cmpswp8__TBB_full_fence
+
+// Mapping relaxed operations to the entry points implementing them.
+/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
+    actually have completely relaxed RMW operation here. **/
+#define __TBB_machine_fetchadd1relaxed      __TBB_machine_fetchadd1acquire
+#define __TBB_machine_fetchadd2relaxed      __TBB_machine_fetchadd2acquire
+#define __TBB_machine_fetchadd4relaxed      __TBB_machine_fetchadd4acquire
+#define __TBB_machine_fetchadd8relaxed      __TBB_machine_fetchadd8acquire
+#define __TBB_machine_fetchstore1relaxed    __TBB_machine_fetchstore1acquire
+#define __TBB_machine_fetchstore2relaxed    __TBB_machine_fetchstore2acquire
+#define __TBB_machine_fetchstore4relaxed    __TBB_machine_fetchstore4acquire
+#define __TBB_machine_fetchstore8relaxed    __TBB_machine_fetchstore8acquire
+#define __TBB_machine_cmpswp1relaxed        __TBB_machine_cmpswp1acquire
+#define __TBB_machine_cmpswp2relaxed        __TBB_machine_cmpswp2acquire 
+#define __TBB_machine_cmpswp4relaxed        __TBB_machine_cmpswp4acquire
+#define __TBB_machine_cmpswp8relaxed        __TBB_machine_cmpswp8acquire
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,V)                               \
+    template <typename T>                                               \
+    struct machine_load_store_relaxed<T,S> {                      \
+        static inline T load ( const T& location ) {                    \
+            return (T)__TBB_machine_load##S##_relaxed(&location);       \
+        }                                                               \
+        static inline void store ( T& location, T value ) {             \
+            __TBB_machine_store##S##_relaxed(&location, (V)value);      \
+        }                                                               \
+    }
+
+namespace tbb {
+namespace internal {
+    __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
+}} // namespaces internal, tbb
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+#define __TBB_USE_FENCED_ATOMICS 1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
 
 // Definition of Lock functions
 #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
@@ -167,4 +186,3 @@ extern "C" {
 // Definition of other utility functions
 #define __TBB_Pause(V) __TBB_machine_pause(V)
 #define __TBB_Log2(V)  __TBB_machine_lg(V)
-