2 Copyright 2005-2011 Intel Corporation. All Rights Reserved.
4 This file is part of Threading Building Blocks.
6 Threading Building Blocks is free software; you can redistribute it
7 and/or modify it under the terms of the GNU General Public License
8 version 2 as published by the Free Software Foundation.
10 Threading Building Blocks is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Threading Building Blocks; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 As a special exception, you may use this file as part of a free software
20 library without restriction. Specifically, if other files instantiate
21 templates or use macros or inline functions from this file, or you compile
22 this file and link it with other files to produce an executable, this
23 file does not by itself cause the resulting executable to be covered by
24 the GNU General Public License. This exception does not however
25 invalidate any other reasons why the executable file might be covered by
26 the GNU General Public License.
29 #ifndef __TBB_parallel_scan_H
30 #define __TBB_parallel_scan_H
33 #include "aligned_space.h"
35 #include "partitioner.h"
39 //! Used to indicate that the initial scan is being performed.
40 /** @ingroup algorithms */
42 static bool is_final_scan() {return false;}
45 //! Used to indicate that the final scan is being performed.
46 /** @ingroup algorithms */
47 struct final_scan_tag {
48 static bool is_final_scan() {return true;}
54 //! Performs final scan for a leaf
55 /** @ingroup algorithms */
56 template<typename Range, typename Body>
57 class final_sum: public task {
61 aligned_space<Range,1> range;
62 //! Where to put result of last subrange, or NULL if not last subrange.
65 final_sum( Body& body_ ) :
68 poison_pointer(stuff_last);
71 range.begin()->~Range();
73 void finish_construction( const Range& range_, Body* stuff_last_ ) {
74 new( range.begin() ) Range(range_);
75 stuff_last = stuff_last_;
78 /*override*/ task* execute() {
79 body( *range.begin(), final_scan_tag() );
81 stuff_last->assign(body);
86 //! Split work to be done in the scan.
87 /** @ingroup algorithms */
88 template<typename Range, typename Body>
89 class sum_node: public task {
90 typedef final_sum<Range,Body> final_sum_type;
92 final_sum_type *incoming;
96 final_sum_type *left_sum;
101 sum_node( const Range range_, bool left_is_final_ ) :
105 left_is_final(left_is_final_),
108 // Poison fields that will be set by second pass.
109 poison_pointer(body);
110 poison_pointer(incoming);
112 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
114 f.recycle_as_child_of( *this );
115 f.finish_construction( range_, stuff_last_ );
119 n->incoming = incoming_;
120 n->stuff_last = stuff_last_;
124 /*override*/ task* execute() {
127 left_sum->body.reverse_join( incoming->body );
128 recycle_as_continuation();
130 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
131 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
132 set_ref_count( (a!=NULL)+(b!=NULL) );
141 template<typename Range_,typename Body_,typename Partitioner_>
142 friend class start_scan;
144 template<typename Range_,typename Body_>
145 friend class finish_scan;
148 //! Combine partial results
149 /** @ingroup algorithms */
150 template<typename Range, typename Body>
151 class finish_scan: public task {
152 typedef sum_node<Range,Body> sum_node_type;
153 typedef final_sum<Range,Body> final_sum_type;
154 final_sum_type** const sum;
155 sum_node_type*& return_slot;
157 final_sum_type* right_zombie;
158 sum_node_type& result;
160 /*override*/ task* execute() {
161 __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
163 result.left_is_final = false;
164 if( right_zombie && sum )
165 ((*sum)->body).reverse_join(result.left_sum->body);
166 __TBB_ASSERT( !return_slot, NULL );
167 if( right_zombie || result.right ) {
168 return_slot = &result;
172 if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
176 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
178 return_slot(return_slot_),
182 __TBB_ASSERT( !return_slot, NULL );
186 //! Initial task to split the work
187 /** @ingroup algorithms */
188 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
189 class start_scan: public task {
190 typedef sum_node<Range,Body> sum_node_type;
191 typedef final_sum<Range,Body> final_sum_type;
192 final_sum_type* body;
193 /** Non-null if caller is requesting total. */
194 final_sum_type** sum;
195 sum_node_type** return_slot;
196 /** Null if computing root. */
197 sum_node_type* parent_sum;
201 typename Partitioner::partition_type partition;
202 /*override*/ task* execute();
204 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
207 return_slot(&return_slot_),
208 parent_sum(parent_sum_),
209 is_final(parent_.is_final),
210 is_right_child(false),
211 range(parent_.range,split()),
212 partition(parent_.partition,split())
214 __TBB_ASSERT( !*return_slot, NULL );
217 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
220 return_slot(&return_slot_),
223 is_right_child(false),
225 partition(partitioner_)
227 __TBB_ASSERT( !*return_slot, NULL );
230 static void run( const Range& range, Body& body, const Partitioner& partitioner ) {
231 if( !range.empty() ) {
232 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
233 internal::sum_node<Range,Body>* root = NULL;
234 typedef internal::final_sum<Range,Body> final_sum_type;
235 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
236 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
237 /*return_slot=*/root,
241 task::spawn_root_and_wait( pass1 );
243 root->body = temp_body;
244 root->incoming = NULL;
245 root->stuff_last = &body;
246 task::spawn_root_and_wait( *root );
248 body.assign(temp_body->body);
249 temp_body->finish_construction( range, NULL );
250 temp_body->destroy(*temp_body);
256 template<typename Range, typename Body, typename Partitioner>
257 task* start_scan<Range,Body,Partitioner>::execute() {
258 typedef internal::finish_scan<Range,Body> finish_pass1_type;
259 finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
260 // Inspecting p->result.left_sum would ordinarily be a race condition.
261 // But we inspect it only if we are not a stolen task, in which case we
262 // know that task assigning to p->result.left_sum has completed.
263 bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
264 if( treat_as_stolen ) {
265 // Invocation is for right child that has been really stolen or needs to be virtually stolen
266 p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
269 task* next_task = NULL;
270 if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
272 (body->body)( range, final_scan_tag() );
274 (body->body)( range, pre_scan_tag() );
277 __TBB_ASSERT( !*return_slot, NULL );
279 sum_node_type* result;
281 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
283 result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
284 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
285 // Split off right child
286 start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
287 b.is_right_child = true;
288 // Left child is recycling of *this. Must recycle this before spawning b,
289 // otherwise b might complete and decrement c.ref_count() to zero, which
290 // would cause c.execute() to run prematurely.
291 recycle_as_child_of(c);
294 sum = &result->left_sum;
295 return_slot = &result->left;
296 is_right_child = false;
299 __TBB_ASSERT( !*return_slot, NULL );
303 } // namespace internal
306 // Requirements on Range concept are documented in blocked_range.h
308 /** \page parallel_scan_body_req Requirements on parallel_scan body
309 Class \c Body implementing the concept of parallel_scan body must define:
310 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
311 Split \c b so that \c this and \c b can accumulate separately
312 - \code Body::~Body(); \endcode Destructor
313 - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
314 Preprocess iterations for range \c r
315 - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
316 Do final processing for iterations of range \c r
317 - \code void Body::reverse_join( Body& a ); \endcode
318 Merge preprocessing state of \c a into \c this, where \c a was
319 created earlier from \c b by b's splitting constructor
322 /** \name parallel_scan
323 See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
326 //! Parallel prefix with default partitioner
327 /** @ingroup algorithms **/
328 template<typename Range, typename Body>
329 void parallel_scan( const Range& range, Body& body ) {
330 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
333 //! Parallel prefix with simple_partitioner
334 /** @ingroup algorithms **/
335 template<typename Range, typename Body>
336 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
337 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
340 //! Parallel prefix with auto_partitioner
341 /** @ingroup algorithms **/
342 template<typename Range, typename Body>
343 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
344 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
350 #endif /* __TBB_parallel_scan_H */