]> git.sesse.net Git - casparcg/blob - tbb/include/tbb/machine/mac_ppc.h
2.0. Updated tbb library.
[casparcg] / tbb / include / tbb / machine / mac_ppc.h
1 /*
2     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
3
4     This file is part of Threading Building Blocks.
5
6     Threading Building Blocks is free software; you can redistribute it
7     and/or modify it under the terms of the GNU General Public License
8     version 2 as published by the Free Software Foundation.
9
10     Threading Building Blocks is distributed in the hope that it will be
11     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License
16     along with Threading Building Blocks; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19     As a special exception, you may use this file as part of a free software
20     library without restriction.  Specifically, if other files instantiate
21     templates or use macros or inline functions from this file, or you compile
22     this file and link it with other files to produce an executable, this
23     file does not by itself cause the resulting executable to be covered by
24     the GNU General Public License.  This exception does not however
25     invalidate any other reasons why the executable file might be covered by
26     the GNU General Public License.
27 */
28
29 #if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
30 #error Do not include this file directly; include tbb_machine.h instead
31 #endif
32
33 #define __TBB_machine_gcc_power_H
34
35 #include <stdint.h>
36 #include <unistd.h>
37
38 // TODO: rename to gcc_power.h?
39 // This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
40 // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
41
42 #if __powerpc64__ || __ppc64__
43     // IBM XL documents __powerpc64__ (and __PPC64__).
44     // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
45     #define __TBB_WORDSIZE 8
46 #else
47     #define __TBB_WORDSIZE 4
48 #endif
49
50 // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
51 #if __TBB_WORDSIZE==8
52     // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
53     #define __TBB_64BIT_ATOMICS 1
54 #elif __bgp__
55     // Do not change the following definition on known 32-bit hardware.
56     #define __TBB_64BIT_ATOMICS 0
57 #else
58     // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
59     // You must make certain that the program will only use them on actual 64-bit hardware
60     // (which typically means that the entire program is only executed on such hardware),
61     // because their implementation involves machine instructions that are illegal elsewhere.
62     // The setting can be chosen independently per compilation unit,
63     // which also means that TBB itself does not need to be rebuilt.
64     // Alternatively (but only for the current architecture and TBB version),
65     // override the default as a predefined macro when invoking the compiler.
66     #ifndef __TBB_64BIT_ATOMICS
67     #define __TBB_64BIT_ATOMICS 0
68     #endif
69 #endif
70
71 inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
72 {
73     int32_t result;
74
75     __asm__ __volatile__("sync\n"
76                          "0:\n\t"
77                          "lwarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
78                          "cmpw %[res],%[cmp]\n\t"        /* compare against comparand */
79                          "bne- 1f\n\t"                   /* exit if not same */
80                          "stwcx. %[val],0,%[ptr]\n\t"    /* store new value */
81                          "bne- 0b\n"                     /* retry if reservation lost */
82                          "1:\n\t"                        /* the exit */
83                          "isync"
84                          : [res]"=&r"(result)
85                          , "+m"(* (int32_t*) ptr)        /* redundant with "memory" */
86                          : [ptr]"r"(ptr)
87                          , [val]"r"(value)
88                          , [cmp]"r"(comparand)
89                          : "memory"                      /* compiler full fence */
90                          , "cr0"                         /* clobbered by cmp and/or stwcx. */
91                          );
92     return result;
93 }
94
95 #if __TBB_WORDSIZE==8
96
97 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
98 {
99     int64_t result;
100     __asm__ __volatile__("sync\n"
101                          "0:\n\t"
102                          "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
103                          "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
104                          "bne- 1f\n\t"                   /* exit if not same */
105                          "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
106                          "bne- 0b\n"                     /* retry if reservation lost */
107                          "1:\n\t"                        /* the exit */
108                          "isync"
109                          : [res]"=&r"(result)
110                          , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
111                          : [ptr]"r"(ptr)
112                          , [val]"r"(value)
113                          , [cmp]"r"(comparand)
114                          : "memory"                      /* compiler full fence */
115                          , "cr0"                         /* clobbered by cmp and/or stdcx. */
116                          );
117     return result;
118 }
119
120 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
121
122 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
123 {
124     int64_t result;
125     int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
126     __asm__ __volatile__("sync\n\t"
127                          "ld %[val],%[valm]\n\t"
128                          "ld %[cmp],%[cmpm]\n"
129                          "0:\n\t"
130                          "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
131                          "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
132                          "bne- 1f\n\t"                   /* exit if not same */
133                          "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
134                          "bne- 0b\n"                     /* retry if reservation lost */
135                          "1:\n\t"                        /* the exit */
136                          "std %[res],%[resm]\n\t"
137                          "isync"
138                          : [resm]"=m"(result)
139                          , [res] "=&r"(   result_register)
140                          , [val] "=&r"(    value_register)
141                          , [cmp] "=&r"(comparand_register)
142                          , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
143                          : [ptr] "r"(ptr)
144                          , [valm]"m"(value)
145                          , [cmpm]"m"(comparand)
146                          : "memory"                      /* compiler full fence */
147                          , "cr0"                         /* clobbered by cmpd and/or stdcx. */
148                          );
149     return result;
150 }
151 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
152
153 #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,load,store,compare)                                                 \
154     template <typename T>                                                                                     \
155     struct machine_load_store<T,S> {                                                                          \
156         static inline T load_with_acquire(const volatile T& location) {                                       \
157             T result;                                                                                         \
158             __asm__ __volatile__(load " %[res],0(%[ptr])\n"                                                   \
159                                  "0:\n\t"                                                                     \
160                                  compare " %[res],%[res]\n\t"                                                 \
161                                  "bne- 0b\n\t"                                                                \
162                                  "isync"                                                                      \
163                                  : [res]"=r"(result)                                                          \
164                                  : [ptr]"b"(&location) /* cannot use register 0 here */                       \
165                                  , "m"(location)       /* redundant with "memory" */                          \
166                                  : "memory"            /* compiler acquire fence */                           \
167                                  , "cr0"               /* clobbered by cmpw/cmpd */);                         \
168             return result;                                                                                    \
169         }                                                                                                     \
170         static inline void store_with_release(volatile T &location, T value) {                                \
171             __asm__ __volatile__("lwsync\n\t"                                                                 \
172                                  store " %[val],0(%[ptr])"                                                    \
173                                  : "=m"(location)      /* redundant with "memory" */                          \
174                                  : [ptr]"b"(&location) /* cannot use register 0 here */                       \
175                                  , [val]"r"(value)                                                            \
176                                  : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);                \
177         }                                                                                                     \
178     };                                                                                                        \
179                                                                                                               \
180     template <typename T>                                                                                     \
181     struct machine_load_store_relaxed<T,S> {                                                            \
182         static inline T load (const __TBB_atomic T& location) {                                               \
183             T result;                                                                                         \
184             __asm__ __volatile__(load " %[res],0(%[ptr])"                                                     \
185                                  : [res]"=r"(result)                                                          \
186                                  : [ptr]"b"(&location) /* cannot use register 0 here */                       \
187                                  , "m"(location)                                                              \
188                                  ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
189             return result;                                                                                    \
190         }                                                                                                     \
191         static inline void store (__TBB_atomic T &location, T value) {                                        \
192             __asm__ __volatile__(store " %[val],0(%[ptr])"                                                    \
193                                  : "=m"(location)                                                             \
194                                  : [ptr]"b"(&location) /* cannot use register 0 here */                       \
195                                  , [val]"r"(value)                                                            \
196                                  ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
197         }                                                                                                     \
198     };
199
200 namespace tbb {
201 namespace internal {
202     __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
203     __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
204     __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
205
206 #if __TBB_WORDSIZE==8
207
208     __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
209
210 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
211
212     template <typename T>
213     struct machine_load_store<T,8> {
214         static inline T load_with_acquire(const volatile T& location) {
215             T result;
216             T result_register; // dummy variable to allocate a register
217             __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
218                                  "std %[res],%[resm]\n"
219                                  "0:\n\t"
220                                  "cmpd %[res],%[res]\n\t"
221                                  "bne- 0b\n\t"
222                                  "isync"
223                                  : [resm]"=m"(result)
224                                  , [res]"=&r"(result_register)
225                                  : [ptr]"b"(&location) /* cannot use register 0 here */
226                                  , "m"(location)       /* redundant with "memory" */
227                                  : "memory"            /* compiler acquire fence */
228                                  , "cr0"               /* clobbered by cmpd */);
229             return result;
230         }
231
232         static inline void store_with_release(volatile T &location, T value) {
233             T value_register; // dummy variable to allocate a register
234             __asm__ __volatile__("lwsync\n\t"
235                                  "ld %[val],%[valm]\n\t"
236                                  "std %[val],0(%[ptr])"
237                                  : "=m"(location)      /* redundant with "memory" */
238                                  , [val]"=&r"(value_register)
239                                  : [ptr]"b"(&location) /* cannot use register 0 here */
240                                  , [valm]"m"(value)
241                                  : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
242         }
243     };
244
245     struct machine_load_store_relaxed<T,8> {
246         static inline T load (const volatile T& location) {
247             T result;
248             T result_register; // dummy variable to allocate a register
249             __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
250                                  "std %[res],%[resm]"
251                                  : [resm]"=m"(result)
252                                  , [res]"=&r"(result_register)
253                                  : [ptr]"b"(&location) /* cannot use register 0 here */
254                                  , "m"(location)
255                                  ); /*(no compiler fence)*/ /*(cr0 not affected)*/
256             return result;
257         }
258
259         static inline void store (volatile T &location, T value) {
260             T value_register; // dummy variable to allocate a register
261             __asm__ __volatile__("ld %[val],%[valm]\n\t"
262                                  "std %[val],0(%[ptr])"
263                                  : "=m"(location)
264                                  , [val]"=&r"(value_register)
265                                  : [ptr]"b"(&location) /* cannot use register 0 here */
266                                  , [valm]"m"(value)
267                                  ); /*(no compiler fence)*/ /*(cr0 not affected)*/
268         }
269     };
270     #define __TBB_machine_load_store_relaxed_8
271
272 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
273
274 }} // namespaces internal, tbb
275
276 #undef __TBB_MACHINE_DEFINE_LOAD_STORE
277
278 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
279 #define __TBB_USE_GENERIC_FETCH_ADD     1
280 #define __TBB_USE_GENERIC_FETCH_STORE   1
281
282 #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
283 #define __TBB_full_memory_fence()          __asm__ __volatile__( "sync": : :"memory")
284
285 static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
286     // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
287 #if __TBB_WORDSIZE==8
288     __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
289     return 63-static_cast<intptr_t>(x);
290 #else
291     __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
292     return 31-static_cast<intptr_t>(x);
293 #endif
294 }
295 #define __TBB_Log2(V) __TBB_machine_lg(V)
296
297 // Assumes implicit alignment for any 32-bit value
298 typedef uint32_t __TBB_Flag;
299 #define __TBB_Flag __TBB_Flag
300
301 inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
302     return __TBB_machine_cmpswp4(&flag,1,0)==0;
303 }
304 #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)