]> git.sesse.net Git - casparcg/blobdiff - tbb/include/tbb/machine/mac_ppc.h
2.0. Updated tbb library.
[casparcg] / tbb / include / tbb / machine / mac_ppc.h
index 311403927ab9b115956b95d433c5f76bef72eff3..ad154fdee5d6976265442a9a88733eb17684d7a7 100644 (file)
     the GNU General Public License.
 */
 
-#ifndef __TBB_machine_H
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
 #error Do not include this file directly; include tbb_machine.h instead
 #endif
 
+#define __TBB_machine_gcc_power_H
+
 #include <stdint.h>
 #include <unistd.h>
 
-// This file is for PowerPC with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
+// TODO: rename to gcc_power.h?
+// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
+// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
+
+#if __powerpc64__ || __ppc64__
+    // IBM XL documents __powerpc64__ (and __PPC64__).
+    // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
+    #define __TBB_WORDSIZE 8
+#else
+    #define __TBB_WORDSIZE 4
+#endif
+
+// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
+#if __TBB_WORDSIZE==8
+    // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
+    #define __TBB_64BIT_ATOMICS 1
+#elif __bgp__
+    // Do not change the following definition on known 32-bit hardware.
+    #define __TBB_64BIT_ATOMICS 0
+#else
+    // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
+    // You must make certain that the program will only use them on actual 64-bit hardware
+    // (which typically means that the entire program is only executed on such hardware),
+    // because their implementation involves machine instructions that are illegal elsewhere.
+    // The setting can be chosen independently per compilation unit,
+    // which also means that TBB itself does not need to be rebuilt.
+    // Alternatively (but only for the current architecture and TBB version),
+    // override the default as a predefined macro when invoking the compiler.
+    #ifndef __TBB_64BIT_ATOMICS
+    #define __TBB_64BIT_ATOMICS 0
+    #endif
+#endif
 
-// Motivation for use of "#if defined(__powerpc64__) || defined(__ppc64__)" to detect a 64-bit environment:
-// IBM XL documents both __powerpc64__ and __PPC64__, and these also appear to work on g++ (documentation?)
-// Apple documents __ppc64__ (with __ppc__ only 32-bit, which is not portable even to other environments using g++)
 inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
 {
     int32_t result;
 
     __asm__ __volatile__("sync\n"
-                         "0: lwarx %0,0,%2\n\t"  /* load w/ reservation */
-                         "cmpw %0,%4\n\t"        /* compare against comparand */
-                         "bne- 1f\n\t"           /* exit if not same */
-                         "stwcx. %3,0,%2\n\t"    /* store new_value */
-                         "bne- 0b\n"             /* retry if reservation lost */
-                         "1: sync"               /* the exit */
-                          : "=&r"(result), "=m"(* (int32_t*) ptr)
-                          : "r"(ptr), "r"(value), "r"(comparand), "m"(* (int32_t*) ptr)
-                          : "cr0", "memory");
+                         "0:\n\t"
+                         "lwarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpw %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stwcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "isync"
+                         : [res]"=&r"(result)
+                         , "+m"(* (int32_t*) ptr)        /* redundant with "memory" */
+                         : [ptr]"r"(ptr)
+                         , [val]"r"(value)
+                         , [cmp]"r"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmp and/or stwcx. */
+                         );
     return result;
 }
 
-#if defined(__powerpc64__) || defined(__ppc64__)
+#if __TBB_WORDSIZE==8
 
 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
 {
     int64_t result;
     __asm__ __volatile__("sync\n"
-                         "0: ldarx %0,0,%2\n\t"  /* load w/ reservation */
-                         "cmpd %0,%4\n\t"        /* compare against comparand */
-                         "bne- 1f\n\t"           /* exit if not same */
-                         "stdcx. %3,0,%2\n\t"    /* store new_value */
-                         "bne- 0b\n"             /* retry if reservation lost */
-                         "1: sync"               /* the exit */
-                          : "=&r"(result), "=m"(* (int64_t*) ptr)
-                          : "r"(ptr), "r"(value), "r"(comparand), "m"(* (int64_t*) ptr)
-                          : "cr0", "memory");
+                         "0:\n\t"
+                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "isync"
+                         : [res]"=&r"(result)
+                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
+                         : [ptr]"r"(ptr)
+                         , [val]"r"(value)
+                         , [cmp]"r"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmp and/or stdcx. */
+                         );
     return result;
 }
-#else
-// Except for special circumstances, 32-bit builds are meant to run on actual 32-bit hardware
-// A locked implementation would also be a possibility
-#define __TBB_64BIT_ATOMICS 0
-#endif /* 64bit CAS */
 
-#define __TBB_BIG_ENDIAN 1
+#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
 
-#if defined(__powerpc64__) || defined(__ppc64__)
-#define __TBB_WORDSIZE 8
-#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
-#else
-#define __TBB_WORDSIZE 4
-#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
-#endif
+inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t result;
+    int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
+    __asm__ __volatile__("sync\n\t"
+                         "ld %[val],%[valm]\n\t"
+                         "ld %[cmp],%[cmpm]\n"
+                         "0:\n\t"
+                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "std %[res],%[resm]\n\t"
+                         "isync"
+                         : [resm]"=m"(result)
+                         , [res] "=&r"(   result_register)
+                         , [val] "=&r"(    value_register)
+                         , [cmp] "=&r"(comparand_register)
+                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
+                         : [ptr] "r"(ptr)
+                         , [valm]"m"(value)
+                         , [cmpm]"m"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmpd and/or stdcx. */
+                         );
+    return result;
+}
+#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
 
-#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
-#if __TBB_64BIT_ATOMICS
-#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
-#endif
-#define __TBB_full_memory_fence() __asm__ __volatile__("sync": : :"memory")
-#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
+#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,load,store,compare)                                                 \
+    template <typename T>                                                                                     \
+    struct machine_load_store<T,S> {                                                                          \
+        static inline T load_with_acquire(const volatile T& location) {                                       \
+            T result;                                                                                         \
+            __asm__ __volatile__(load " %[res],0(%[ptr])\n"                                                   \
+                                 "0:\n\t"                                                                     \
+                                 compare " %[res],%[res]\n\t"                                                 \
+                                 "bne- 0b\n\t"                                                                \
+                                 "isync"                                                                      \
+                                 : [res]"=r"(result)                                                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , "m"(location)       /* redundant with "memory" */                          \
+                                 : "memory"            /* compiler acquire fence */                           \
+                                 , "cr0"               /* clobbered by cmpw/cmpd */);                         \
+            return result;                                                                                    \
+        }                                                                                                     \
+        static inline void store_with_release(volatile T &location, T value) {                                \
+            __asm__ __volatile__("lwsync\n\t"                                                                 \
+                                 store " %[val],0(%[ptr])"                                                    \
+                                 : "=m"(location)      /* redundant with "memory" */                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , [val]"r"(value)                                                            \
+                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);                \
+        }                                                                                                     \
+    };                                                                                                        \
+                                                                                                              \
+    template <typename T>                                                                                     \
+    struct machine_load_store_relaxed<T,S> {                                                            \
+        static inline T load (const __TBB_atomic T& location) {                                               \
+            T result;                                                                                         \
+            __asm__ __volatile__(load " %[res],0(%[ptr])"                                                     \
+                                 : [res]"=r"(result)                                                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , "m"(location)                                                              \
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
+            return result;                                                                                    \
+        }                                                                                                     \
+        static inline void store (__TBB_atomic T &location, T value) {                                        \
+            __asm__ __volatile__(store " %[val],0(%[ptr])"                                                    \
+                                 : "=m"(location)                                                             \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , [val]"r"(value)                                                            \
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
+        }                                                                                                     \
+    };
+
+namespace tbb {
+namespace internal {
+    __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
+    __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
+    __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
+
+#if __TBB_WORDSIZE==8
+
+    __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
+
+#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
+
+    template <typename T>
+    struct machine_load_store<T,8> {
+        static inline T load_with_acquire(const volatile T& location) {
+            T result;
+            T result_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
+                                 "std %[res],%[resm]\n"
+                                 "0:\n\t"
+                                 "cmpd %[res],%[res]\n\t"
+                                 "bne- 0b\n\t"
+                                 "isync"
+                                 : [resm]"=m"(result)
+                                 , [res]"=&r"(result_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , "m"(location)       /* redundant with "memory" */
+                                 : "memory"            /* compiler acquire fence */
+                                 , "cr0"               /* clobbered by cmpd */);
+            return result;
+        }
+
+        static inline void store_with_release(volatile T &location, T value) {
+            T value_register; // dummy variable to allocate a register
+            __asm__ __volatile__("lwsync\n\t"
+                                 "ld %[val],%[valm]\n\t"
+                                 "std %[val],0(%[ptr])"
+                                 : "=m"(location)      /* redundant with "memory" */
+                                 , [val]"=&r"(value_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , [valm]"m"(value)
+                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
+        }
+    };
+
+    struct machine_load_store_relaxed<T,8> {
+        static inline T load (const volatile T& location) {
+            T result;
+            T result_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
+                                 "std %[res],%[resm]"
+                                 : [resm]"=m"(result)
+                                 , [res]"=&r"(result_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , "m"(location)
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
+            return result;
+        }
+
+        static inline void store (volatile T &location, T value) {
+            T value_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[val],%[valm]\n\t"
+                                 "std %[val],0(%[ptr])"
+                                 : "=m"(location)
+                                 , [val]"=&r"(value_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , [valm]"m"(value)
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
+        }
+    };
+    #define __TBB_machine_load_store_relaxed_8
+
+#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
+
+}} // namespaces internal, tbb
+
+#undef __TBB_MACHINE_DEFINE_LOAD_STORE
+
+#define __TBB_USE_GENERIC_PART_WORD_CAS 1
+#define __TBB_USE_GENERIC_FETCH_ADD     1
+#define __TBB_USE_GENERIC_FETCH_STORE   1
+
+#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
+#define __TBB_full_memory_fence()          __asm__ __volatile__( "sync": : :"memory")
 
-#if !__IBMCPP__
-// "1501-230 (S) Internal compiler error; please contact your Service Representative"
 static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
-    // TODO: assumes sizeof(uintptr_t)<=8 resp. 4
-    #if defined(__powerpc64__) || defined(__ppc64__)
-    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); // counting starts at 2^63
+    // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
+#if __TBB_WORDSIZE==8
+    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
     return 63-static_cast<intptr_t>(x);
-    #else
-    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); // counting starts at 2^31 (on 64-bit hardware, higher-order bits are ignored)
+#else
+    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
     return 31-static_cast<intptr_t>(x);
-    #endif
+#endif
 }
 #define __TBB_Log2(V) __TBB_machine_lg(V)
-#endif
 
-#define __TBB_Byte uint32_t // TODO: would this ever not be aligned without an alignment specification?
+// Assumes implicit alignment for any 32-bit value
+typedef uint32_t __TBB_Flag;
+#define __TBB_Flag __TBB_Flag
 
-inline bool __TBB_machine_trylockbyte( __TBB_Byte &flag ) {
+inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
     return __TBB_machine_cmpswp4(&flag,1,0)==0;
 }
 #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)