2 Copyright 2005-2011 Intel Corporation. All Rights Reserved.
4 This file is part of Threading Building Blocks.
6 Threading Building Blocks is free software; you can redistribute it
7 and/or modify it under the terms of the GNU General Public License
8 version 2 as published by the Free Software Foundation.
10 Threading Building Blocks is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Threading Building Blocks; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 As a special exception, you may use this file as part of a free software
20 library without restriction. Specifically, if other files instantiate
21 templates or use macros or inline functions from this file, or you compile
22 this file and link it with other files to produce an executable, this
23 file does not by itself cause the resulting executable to be covered by
24 the GNU General Public License. This exception does not however
25 invalidate any other reasons why the executable file might be covered by
26 the GNU General Public License.
29 #ifndef __TBB_parallel_reduce_H
30 #define __TBB_parallel_reduce_H
33 #include "aligned_space.h"
34 #include "partitioner.h"
35 #include "tbb_profiling.h"
42 //! 0 if root, 1 if a left child, 2 if a right child.
43 /** Represented as a char, not enum, for compactness. */
44 typedef char reduction_context;
46 //! Task type use to combine the partial results of parallel_reduce.
47 /** @ingroup algorithms */
48 template<typename Body>
49 class finish_reduce: public task {
50 //! Pointer to body, or NULL if the left child has not yet finished.
52 bool has_right_zombie;
53 const reduction_context my_context;
54 aligned_space<Body,1> zombie_space;
55 finish_reduce( reduction_context context_ ) :
57 has_right_zombie(false),
62 if( has_right_zombie ) {
63 // Right child was stolen.
64 Body* s = zombie_space.begin();
68 if( my_context==1 ) // left child
69 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
72 template<typename Range,typename Body_, typename Partitioner>
73 friend class start_reduce;
76 //! Task type used to split the work of parallel_reduce.
77 /** @ingroup algorithms */
78 template<typename Range, typename Body, typename Partitioner>
79 class start_reduce: public task {
80 typedef finish_reduce<Body> finish_type;
83 typename Partitioner::partition_type my_partition;
84 reduction_context my_context;
85 /*override*/ task* execute();
86 template<typename Body_>
87 friend class finish_reduce;
89 //! Constructor used for root task
90 start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
93 my_partition(partitioner),
97 //! Splitting constructor used to generate children.
98 /** parent_ becomes left child. Newly constructed object is right child. */
99 start_reduce( start_reduce& parent_, split ) :
100 my_body(parent_.my_body),
101 my_range(parent_.my_range,split()),
102 my_partition(parent_.my_partition,split()),
105 my_partition.set_affinity(*this);
106 parent_.my_context = 1;
108 //! Update affinity info, if any
109 /*override*/ void note_affinity( affinity_id id ) {
110 my_partition.note_affinity( id );
114 static void run( const Range& range, Body& body, Partitioner& partitioner ) {
115 if( !range.empty() ) {
116 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
117 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
119 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
120 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
121 task_group_context context;
122 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
123 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
126 #if __TBB_TASK_GROUP_CONTEXT
127 static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
129 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
131 #endif /* __TBB_TASK_GROUP_CONTEXT */
134 template<typename Range, typename Body, typename Partitioner>
135 task* start_reduce<Range,Body,Partitioner>::execute() {
136 if( my_context==2 ) { // right child
137 finish_type* p = static_cast<finish_type*>(parent());
138 if( !itt_load_word_with_acquire(p->my_body) ) {
139 my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
140 p->has_right_zombie = true;
143 if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
144 (*my_body)( my_range );
146 itt_store_word_with_release(static_cast<finish_type*>(parent())->my_body, my_body );
147 return my_partition.continue_after_execute_range();
149 finish_type& c = *new( allocate_continuation()) finish_type(my_context);
150 recycle_as_child_of(c);
152 bool delay = my_partition.decide_whether_to_delay();
153 start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
154 my_partition.spawn_or_delay(delay,b);
159 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
160 //! Task type use to combine the partial results of parallel_deterministic_reduce.
161 /** @ingroup algorithms */
162 template<typename Body>
163 class finish_deterministic_reduce: public task {
167 finish_deterministic_reduce( Body &body ) :
168 my_left_body( body ),
169 my_right_body( body, split() )
173 my_left_body.join( my_right_body );
176 template<typename Range,typename Body_>
177 friend class start_deterministic_reduce;
180 //! Task type used to split the work of parallel_deterministic_reduce.
181 /** @ingroup algorithms */
182 template<typename Range, typename Body>
183 class start_deterministic_reduce: public task {
184 typedef finish_deterministic_reduce<Body> finish_type;
187 /*override*/ task* execute();
189 //! Constructor used for root task
190 start_deterministic_reduce( const Range& range, Body& body ) :
195 //! Splitting constructor used to generate children.
196 /** parent_ becomes left child. Newly constructed object is right child. */
197 start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
198 my_body( c.my_right_body ),
199 my_range( parent_.my_range, split() )
204 static void run( const Range& range, Body& body ) {
205 if( !range.empty() ) {
206 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
207 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
209 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
210 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
211 task_group_context context;
212 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
213 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
216 #if __TBB_TASK_GROUP_CONTEXT
217 static void run( const Range& range, Body& body, task_group_context& context ) {
219 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
221 #endif /* __TBB_TASK_GROUP_CONTEXT */
224 template<typename Range, typename Body>
225 task* start_deterministic_reduce<Range,Body>::execute() {
226 if( !my_range.is_divisible() ) {
230 finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
231 recycle_as_child_of(c);
233 start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
238 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
240 //! Auxiliary class for parallel_reduce; for internal use only.
241 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
242 using given \ref parallel_reduce_lambda_req "anonymous function objects".
244 /** @ingroup algorithms */
245 template<typename Range, typename Value, typename RealBody, typename Reduction>
246 class lambda_reduce_body {
248 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
249 // (might require some performance measurements)
251 const Value& identity_element;
252 const RealBody& my_real_body;
253 const Reduction& my_reduction;
255 lambda_reduce_body& operator= ( const lambda_reduce_body& other );
257 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
258 : identity_element(identity)
260 , my_reduction(reduction)
263 lambda_reduce_body( const lambda_reduce_body& other )
264 : identity_element(other.identity_element)
265 , my_real_body(other.my_real_body)
266 , my_reduction(other.my_reduction)
267 , my_value(other.my_value)
269 lambda_reduce_body( lambda_reduce_body& other, tbb::split )
270 : identity_element(other.identity_element)
271 , my_real_body(other.my_real_body)
272 , my_reduction(other.my_reduction)
273 , my_value(other.identity_element)
275 void operator()(Range& range) {
276 my_value = my_real_body(range, const_cast<const Value&>(my_value));
278 void join( lambda_reduce_body& rhs ) {
279 my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
281 Value result() const {
286 } // namespace internal
289 // Requirements on Range concept are documented in blocked_range.h
291 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
292 Class \c Body implementing the concept of parallel_reduce body must define:
293 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
294 Must be able to run concurrently with operator() and method \c join
295 - \code Body::~Body(); \endcode Destructor
296 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
297 and accumulating the result
298 - \code void Body::join( Body& b ); \endcode Join results.
299 The result in \c b should be merged into the result of \c this
302 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
306 /** \name parallel_reduce
307 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
310 //! Parallel iteration with reduction and default partitioner.
311 /** @ingroup algorithms **/
312 template<typename Range, typename Body>
313 void parallel_reduce( const Range& range, Body& body ) {
314 internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
317 //! Parallel iteration with reduction and simple_partitioner
318 /** @ingroup algorithms **/
319 template<typename Range, typename Body>
320 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
321 internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
324 //! Parallel iteration with reduction and auto_partitioner
325 /** @ingroup algorithms **/
326 template<typename Range, typename Body>
327 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
328 internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
331 //! Parallel iteration with reduction and affinity_partitioner
332 /** @ingroup algorithms **/
333 template<typename Range, typename Body>
334 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
335 internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
338 #if __TBB_TASK_GROUP_CONTEXT
339 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
340 /** @ingroup algorithms **/
341 template<typename Range, typename Body>
342 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
343 internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
346 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
347 /** @ingroup algorithms **/
348 template<typename Range, typename Body>
349 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
350 internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
353 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
354 /** @ingroup algorithms **/
355 template<typename Range, typename Body>
356 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
357 internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
359 #endif /* __TBB_TASK_GROUP_CONTEXT */
361 /** parallel_reduce overloads that work with anonymous function objects
362 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
364 //! Parallel iteration with reduction and default partitioner.
365 /** @ingroup algorithms **/
366 template<typename Range, typename Value, typename RealBody, typename Reduction>
367 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
368 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
369 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
370 ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
371 return body.result();
374 //! Parallel iteration with reduction and simple_partitioner.
375 /** @ingroup algorithms **/
376 template<typename Range, typename Value, typename RealBody, typename Reduction>
377 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
378 const simple_partitioner& partitioner ) {
379 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
380 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
381 ::run(range, body, partitioner );
382 return body.result();
385 //! Parallel iteration with reduction and auto_partitioner
386 /** @ingroup algorithms **/
387 template<typename Range, typename Value, typename RealBody, typename Reduction>
388 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
389 const auto_partitioner& partitioner ) {
390 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
391 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
392 ::run( range, body, partitioner );
393 return body.result();
396 //! Parallel iteration with reduction and affinity_partitioner
397 /** @ingroup algorithms **/
398 template<typename Range, typename Value, typename RealBody, typename Reduction>
399 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
400 affinity_partitioner& partitioner ) {
401 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
402 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
403 ::run( range, body, partitioner );
404 return body.result();
407 #if __TBB_TASK_GROUP_CONTEXT
408 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
409 /** @ingroup algorithms **/
410 template<typename Range, typename Value, typename RealBody, typename Reduction>
411 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
412 const simple_partitioner& partitioner, task_group_context& context ) {
413 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
414 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
415 ::run( range, body, partitioner, context );
416 return body.result();
419 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
420 /** @ingroup algorithms **/
421 template<typename Range, typename Value, typename RealBody, typename Reduction>
422 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
423 const auto_partitioner& partitioner, task_group_context& context ) {
424 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
425 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
426 ::run( range, body, partitioner, context );
427 return body.result();
430 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
431 /** @ingroup algorithms **/
432 template<typename Range, typename Value, typename RealBody, typename Reduction>
433 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
434 affinity_partitioner& partitioner, task_group_context& context ) {
435 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
436 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
437 ::run( range, body, partitioner, context );
438 return body.result();
440 #endif /* __TBB_TASK_GROUP_CONTEXT */
442 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
443 //! Parallel iteration with deterministic reduction and default partitioner.
444 /** @ingroup algorithms **/
445 template<typename Range, typename Body>
446 void parallel_deterministic_reduce( const Range& range, Body& body ) {
447 internal::start_deterministic_reduce<Range,Body>::run( range, body );
450 #if __TBB_TASK_GROUP_CONTEXT
451 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
452 /** @ingroup algorithms **/
453 template<typename Range, typename Body>
454 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
455 internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
457 #endif /* __TBB_TASK_GROUP_CONTEXT */
459 /** parallel_reduce overloads that work with anonymous function objects
460 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
462 //! Parallel iteration with deterministic reduction and default partitioner.
463 /** @ingroup algorithms **/
464 template<typename Range, typename Value, typename RealBody, typename Reduction>
465 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
466 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
467 internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
469 return body.result();
472 #if __TBB_TASK_GROUP_CONTEXT
473 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
474 /** @ingroup algorithms **/
475 template<typename Range, typename Value, typename RealBody, typename Reduction>
476 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
477 task_group_context& context ) {
478 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
479 internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
480 ::run( range, body, context );
481 return body.result();
483 #endif /* __TBB_TASK_GROUP_CONTEXT */
484 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
489 #endif /* __TBB_parallel_reduce_H */