2 Copyright 2005-2011 Intel Corporation. All Rights Reserved.
4 This file is part of Threading Building Blocks.
6 Threading Building Blocks is free software; you can redistribute it
7 and/or modify it under the terms of the GNU General Public License
8 version 2 as published by the Free Software Foundation.
10 Threading Building Blocks is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Threading Building Blocks; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 As a special exception, you may use this file as part of a free software
20 library without restriction. Specifically, if other files instantiate
21 templates or use macros or inline functions from this file, or you compile
22 this file and link it with other files to produce an executable, this
23 file does not by itself cause the resulting executable to be covered by
24 the GNU General Public License. This exception does not however
25 invalidate any other reasons why the executable file might be covered by
26 the GNU General Public License.
29 #if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
30 #error Do not include this file directly; include tbb_machine.h instead
33 #define __TBB_machine_gcc_power_H
38 // TODO: rename to gcc_power.h?
39 // This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
40 // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
42 #if __powerpc64__ || __ppc64__
43 // IBM XL documents __powerpc64__ (and __PPC64__).
44 // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
45 #define __TBB_WORDSIZE 8
47 #define __TBB_WORDSIZE 4
50 // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
52 // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
53 #define __TBB_64BIT_ATOMICS 1
55 // Do not change the following definition on known 32-bit hardware.
56 #define __TBB_64BIT_ATOMICS 0
58 // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
59 // You must make certain that the program will only use them on actual 64-bit hardware
60 // (which typically means that the entire program is only executed on such hardware),
61 // because their implementation involves machine instructions that are illegal elsewhere.
62 // The setting can be chosen independently per compilation unit,
63 // which also means that TBB itself does not need to be rebuilt.
64 // Alternatively (but only for the current architecture and TBB version),
65 // override the default as a predefined macro when invoking the compiler.
66 #ifndef __TBB_64BIT_ATOMICS
67 #define __TBB_64BIT_ATOMICS 0
71 inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
75 __asm__ __volatile__("sync\n"
77 "lwarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
78 "cmpw %[res],%[cmp]\n\t" /* compare against comparand */
79 "bne- 1f\n\t" /* exit if not same */
80 "stwcx. %[val],0,%[ptr]\n\t" /* store new value */
81 "bne- 0b\n" /* retry if reservation lost */
82 "1:\n\t" /* the exit */
85 , "+m"(* (int32_t*) ptr) /* redundant with "memory" */
89 : "memory" /* compiler full fence */
90 , "cr0" /* clobbered by cmp and/or stwcx. */
97 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
100 __asm__ __volatile__("sync\n"
102 "ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
103 "cmpd %[res],%[cmp]\n\t" /* compare against comparand */
104 "bne- 1f\n\t" /* exit if not same */
105 "stdcx. %[val],0,%[ptr]\n\t" /* store new value */
106 "bne- 0b\n" /* retry if reservation lost */
107 "1:\n\t" /* the exit */
110 , "+m"(* (int64_t*) ptr) /* redundant with "memory" */
113 , [cmp]"r"(comparand)
114 : "memory" /* compiler full fence */
115 , "cr0" /* clobbered by cmp and/or stdcx. */
120 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
122 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
125 int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
126 __asm__ __volatile__("sync\n\t"
127 "ld %[val],%[valm]\n\t"
128 "ld %[cmp],%[cmpm]\n"
130 "ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
131 "cmpd %[res],%[cmp]\n\t" /* compare against comparand */
132 "bne- 1f\n\t" /* exit if not same */
133 "stdcx. %[val],0,%[ptr]\n\t" /* store new value */
134 "bne- 0b\n" /* retry if reservation lost */
135 "1:\n\t" /* the exit */
136 "std %[res],%[resm]\n\t"
139 , [res] "=&r"( result_register)
140 , [val] "=&r"( value_register)
141 , [cmp] "=&r"(comparand_register)
142 , "+m"(* (int64_t*) ptr) /* redundant with "memory" */
145 , [cmpm]"m"(comparand)
146 : "memory" /* compiler full fence */
147 , "cr0" /* clobbered by cmpd and/or stdcx. */
151 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
153 #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,load,store,compare) \
154 template <typename T> \
155 struct machine_load_store<T,S> { \
156 static inline T load_with_acquire(const volatile T& location) { \
158 __asm__ __volatile__(load " %[res],0(%[ptr])\n" \
160 compare " %[res],%[res]\n\t" \
163 : [res]"=r"(result) \
164 : [ptr]"b"(&location) /* cannot use register 0 here */ \
165 , "m"(location) /* redundant with "memory" */ \
166 : "memory" /* compiler acquire fence */ \
167 , "cr0" /* clobbered by cmpw/cmpd */); \
170 static inline void store_with_release(volatile T &location, T value) { \
171 __asm__ __volatile__("lwsync\n\t" \
172 store " %[val],0(%[ptr])" \
173 : "=m"(location) /* redundant with "memory" */ \
174 : [ptr]"b"(&location) /* cannot use register 0 here */ \
176 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/); \
180 template <typename T> \
181 struct machine_load_store_relaxed<T,S> { \
182 static inline T load (const __TBB_atomic T& location) { \
184 __asm__ __volatile__(load " %[res],0(%[ptr])" \
185 : [res]"=r"(result) \
186 : [ptr]"b"(&location) /* cannot use register 0 here */ \
188 ); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
191 static inline void store (__TBB_atomic T &location, T value) { \
192 __asm__ __volatile__(store " %[val],0(%[ptr])" \
194 : [ptr]"b"(&location) /* cannot use register 0 here */ \
196 ); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
202 __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
203 __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
204 __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
206 #if __TBB_WORDSIZE==8
208 __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
210 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
212 template <typename T>
213 struct machine_load_store<T,8> {
214 static inline T load_with_acquire(const volatile T& location) {
216 T result_register; // dummy variable to allocate a register
217 __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
218 "std %[res],%[resm]\n"
220 "cmpd %[res],%[res]\n\t"
224 , [res]"=&r"(result_register)
225 : [ptr]"b"(&location) /* cannot use register 0 here */
226 , "m"(location) /* redundant with "memory" */
227 : "memory" /* compiler acquire fence */
228 , "cr0" /* clobbered by cmpd */);
232 static inline void store_with_release(volatile T &location, T value) {
233 T value_register; // dummy variable to allocate a register
234 __asm__ __volatile__("lwsync\n\t"
235 "ld %[val],%[valm]\n\t"
236 "std %[val],0(%[ptr])"
237 : "=m"(location) /* redundant with "memory" */
238 , [val]"=&r"(value_register)
239 : [ptr]"b"(&location) /* cannot use register 0 here */
241 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
245 struct machine_load_store_relaxed<T,8> {
246 static inline T load (const volatile T& location) {
248 T result_register; // dummy variable to allocate a register
249 __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
252 , [res]"=&r"(result_register)
253 : [ptr]"b"(&location) /* cannot use register 0 here */
255 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
259 static inline void store (volatile T &location, T value) {
260 T value_register; // dummy variable to allocate a register
261 __asm__ __volatile__("ld %[val],%[valm]\n\t"
262 "std %[val],0(%[ptr])"
264 , [val]"=&r"(value_register)
265 : [ptr]"b"(&location) /* cannot use register 0 here */
267 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
270 #define __TBB_machine_load_store_relaxed_8
272 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
274 }} // namespaces internal, tbb
276 #undef __TBB_MACHINE_DEFINE_LOAD_STORE
278 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
279 #define __TBB_USE_GENERIC_FETCH_ADD 1
280 #define __TBB_USE_GENERIC_FETCH_STORE 1
282 #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
283 #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
285 static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
286 // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
287 #if __TBB_WORDSIZE==8
288 __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
289 return 63-static_cast<intptr_t>(x);
291 __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
292 return 31-static_cast<intptr_t>(x);
295 #define __TBB_Log2(V) __TBB_machine_lg(V)
297 // Assumes implicit alignment for any 32-bit value
298 typedef uint32_t __TBB_Flag;
299 #define __TBB_Flag __TBB_Flag
301 inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
302 return __TBB_machine_cmpswp4(&flag,1,0)==0;
304 #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)