git.sesse.net Git - casparcg/blob - tbb/include/tbb/parallel_reduce.h

   1 /*
   2     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
   3
   4     This file is part of Threading Building Blocks.
   5
   6     Threading Building Blocks is free software; you can redistribute it
   7     and/or modify it under the terms of the GNU General Public License
   8     version 2 as published by the Free Software Foundation.
   9
  10     Threading Building Blocks is distributed in the hope that it will be
  11     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  12     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13     GNU General Public License for more details.
  14
  15     You should have received a copy of the GNU General Public License
  16     along with Threading Building Blocks; if not, write to the Free Software
  17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19     As a special exception, you may use this file as part of a free software
  20     library without restriction.  Specifically, if other files instantiate
  21     templates or use macros or inline functions from this file, or you compile
  22     this file and link it with other files to produce an executable, this
  23     file does not by itself cause the resulting executable to be covered by
  24     the GNU General Public License.  This exception does not however
  25     invalidate any other reasons why the executable file might be covered by
  26     the GNU General Public License.
  27 */
  28
  29 #ifndef __TBB_parallel_reduce_H
  30 #define __TBB_parallel_reduce_H
  31
  32 #include "task.h"
  33 #include "aligned_space.h"
  34 #include "partitioner.h"
  35 #include "tbb_profiling.h"
  36 #include <new>
  37
  38 namespace tbb {
  39
  40 //! @cond INTERNAL
  41 namespace internal {
  42     //! 0 if root, 1 if a left child, 2 if a right child.
  43     /** Represented as a char, not enum, for compactness. */
  44     typedef char reduction_context;
  45
  46     //! Task type use to combine the partial results of parallel_reduce.
  47     /** @ingroup algorithms */
  48     template<typename Body>
  49     class finish_reduce: public task {
  50         //! Pointer to body, or NULL if the left child has not yet finished.
  51         Body* my_body;
  52         bool has_right_zombie;
  53         const reduction_context my_context;
  54         aligned_space<Body,1> zombie_space;
  55         finish_reduce( reduction_context context_ ) :
  56             my_body(NULL),
  57             has_right_zombie(false),
  58             my_context(context_)
  59         {
  60         }
  61         task* execute() {
  62             if( has_right_zombie ) {
  63                 // Right child was stolen.
  64                 Body* s = zombie_space.begin();
  65                 my_body->join( *s );
  66                 s->~Body();
  67             }
  68             if( my_context==1 )  // left child
  69                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
  70             return NULL;
  71         }
  72         template<typename Range,typename Body_, typename Partitioner>
  73         friend class start_reduce;
  74     };
  75
  76     //! Task type used to split the work of parallel_reduce.
  77     /** @ingroup algorithms */
  78     template<typename Range, typename Body, typename Partitioner>
  79     class start_reduce: public task {
  80         typedef finish_reduce<Body> finish_type;
  81         Body* my_body;
  82         Range my_range;
  83         typename Partitioner::partition_type my_partition;
  84         reduction_context my_context;
  85         /*override*/ task* execute();
  86         template<typename Body_>
  87         friend class finish_reduce;
  88
  89         //! Constructor used for root task
  90         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
  91             my_body(body),
  92             my_range(range),
  93             my_partition(partitioner),
  94             my_context(0)
  95         {
  96         }
  97         //! Splitting constructor used to generate children.
  98         /** parent_ becomes left child.  Newly constructed object is right child. */
  99         start_reduce( start_reduce& parent_, split ) :
 100             my_body(parent_.my_body),
 101             my_range(parent_.my_range,split()),
 102             my_partition(parent_.my_partition,split()),
 103             my_context(2)
 104         {
 105             my_partition.set_affinity(*this);
 106             parent_.my_context = 1;
 107         }
 108         //! Update affinity info, if any
 109         /*override*/ void note_affinity( affinity_id id ) {
 110             my_partition.note_affinity( id );
 111         }
 112
 113 public:
 114         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
 115             if( !range.empty() ) {
 116 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
 117                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
 118 #else
 119                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
 120                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
 121                 task_group_context context;
 122                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
 123 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
 124             }
 125         }
 126 #if __TBB_TASK_GROUP_CONTEXT
 127         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
 128             if( !range.empty() )
 129                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
 130         }
 131 #endif /* __TBB_TASK_GROUP_CONTEXT */
 132     };
 133
 134     template<typename Range, typename Body, typename Partitioner>
 135     task* start_reduce<Range,Body,Partitioner>::execute() {
 136         if( my_context==2 ) { // right child
 137             finish_type* p = static_cast<finish_type*>(parent());
 138             if( !itt_load_word_with_acquire(p->my_body) ) {
 139                 my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
 140                 p->has_right_zombie = true;
 141             }
 142         }
 143         if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
 144             (*my_body)( my_range );
 145             if( my_context==1 )
 146                 itt_store_word_with_release(static_cast<finish_type*>(parent())->my_body, my_body );
 147             return my_partition.continue_after_execute_range();
 148         } else {
 149             finish_type& c = *new( allocate_continuation()) finish_type(my_context);
 150             recycle_as_child_of(c);
 151             c.set_ref_count(2);
 152             bool delay = my_partition.decide_whether_to_delay();
 153             start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
 154             my_partition.spawn_or_delay(delay,b);
 155             return this;
 156         }
 157     }
 158
 159     //! Auxiliary class for parallel_reduce; for internal use only.
 160     /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
 161         using given \ref parallel_reduce_lambda_req "anonymous function objects".
 162      **/
 163     /** @ingroup algorithms */
 164     template<typename Range, typename Value, typename RealBody, typename Reduction>
 165     class lambda_reduce_body {
 166
 167 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
 168 //       (might require some performance measurements)
 169
 170         const Value&     identity_element;
 171         const RealBody&  my_real_body;
 172         const Reduction& my_reduction;
 173         Value            my_value;
 174         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
 175     public:
 176         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
 177             : identity_element(identity)
 178             , my_real_body(body)
 179             , my_reduction(reduction)
 180             , my_value(identity)
 181         { }
 182         lambda_reduce_body( const lambda_reduce_body& other )
 183             : identity_element(other.identity_element)
 184             , my_real_body(other.my_real_body)
 185             , my_reduction(other.my_reduction)
 186             , my_value(other.my_value)
 187         { }
 188         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
 189             : identity_element(other.identity_element)
 190             , my_real_body(other.my_real_body)
 191             , my_reduction(other.my_reduction)
 192             , my_value(other.identity_element)
 193         { }
 194         void operator()(Range& range) {
 195             my_value = my_real_body(range, const_cast<const Value&>(my_value));
 196         }
 197         void join( lambda_reduce_body& rhs ) {
 198             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
 199         }
 200         Value result() const {
 201             return my_value;
 202         }
 203     };
 204
 205 } // namespace internal
 206 //! @endcond
 207
 208 // Requirements on Range concept are documented in blocked_range.h
 209
 210 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
 211     Class \c Body implementing the concept of parallel_reduce body must define:
 212     - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
 213                                                         Must be able to run concurrently with operator() and method \c join
 214     - \code Body::~Body(); \endcode                     Destructor
 215     - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
 216                                                         and accumulating the result
 217     - \code void Body::join( Body& b ); \endcode        Join results.
 218                                                         The result in \c b should be merged into the result of \c this
 219 **/
 220
 221 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
 222     TO BE DOCUMENTED
 223 **/
 224
 225 /** \name parallel_reduce
 226     See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
 227 //@{
 228
 229 //! Parallel iteration with reduction and default partitioner.
 230 /** @ingroup algorithms **/
 231 template<typename Range, typename Body>
 232 void parallel_reduce( const Range& range, Body& body ) {
 233     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
 234 }
 235
 236 //! Parallel iteration with reduction and simple_partitioner
 237 /** @ingroup algorithms **/
 238 template<typename Range, typename Body>
 239 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
 240     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
 241 }
 242
 243 //! Parallel iteration with reduction and auto_partitioner
 244 /** @ingroup algorithms **/
 245 template<typename Range, typename Body>
 246 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
 247     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
 248 }
 249
 250 //! Parallel iteration with reduction and affinity_partitioner
 251 /** @ingroup algorithms **/
 252 template<typename Range, typename Body>
 253 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
 254     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
 255 }
 256
 257 #if __TBB_TASK_GROUP_CONTEXT
 258 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
 259 /** @ingroup algorithms **/
 260 template<typename Range, typename Body>
 261 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
 262     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
 263 }
 264
 265 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
 266 /** @ingroup algorithms **/
 267 template<typename Range, typename Body>
 268 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
 269     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
 270 }
 271
 272 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
 273 /** @ingroup algorithms **/
 274 template<typename Range, typename Body>
 275 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
 276     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
 277 }
 278 #endif /* __TBB_TASK_GROUP_CONTEXT */
 279
 280 /** parallel_reduce overloads that work with anonymous function objects
 281     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
 282
 283 //! Parallel iteration with reduction and default partitioner.
 284 /** @ingroup algorithms **/
 285 template<typename Range, typename Value, typename RealBody, typename Reduction>
 286 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
 287     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 288     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
 289                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
 290     return body.result();
 291 }
 292
 293 //! Parallel iteration with reduction and simple_partitioner.
 294 /** @ingroup algorithms **/
 295 template<typename Range, typename Value, typename RealBody, typename Reduction>
 296 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 297                        const simple_partitioner& partitioner ) {
 298     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 299     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
 300                           ::run(range, body, partitioner );
 301     return body.result();
 302 }
 303
 304 //! Parallel iteration with reduction and auto_partitioner
 305 /** @ingroup algorithms **/
 306 template<typename Range, typename Value, typename RealBody, typename Reduction>
 307 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 308                        const auto_partitioner& partitioner ) {
 309     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 310     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
 311                           ::run( range, body, partitioner );
 312     return body.result();
 313 }
 314
 315 //! Parallel iteration with reduction and affinity_partitioner
 316 /** @ingroup algorithms **/
 317 template<typename Range, typename Value, typename RealBody, typename Reduction>
 318 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 319                        affinity_partitioner& partitioner ) {
 320     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 321     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
 322                                         ::run( range, body, partitioner );
 323     return body.result();
 324 }
 325
 326 #if __TBB_TASK_GROUP_CONTEXT
 327 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
 328 /** @ingroup algorithms **/
 329 template<typename Range, typename Value, typename RealBody, typename Reduction>
 330 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 331                        const simple_partitioner& partitioner, task_group_context& context ) {
 332     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 333     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
 334                           ::run( range, body, partitioner, context );
 335     return body.result();
 336 }
 337
 338 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
 339 /** @ingroup algorithms **/
 340 template<typename Range, typename Value, typename RealBody, typename Reduction>
 341 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 342                        const auto_partitioner& partitioner, task_group_context& context ) {
 343     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 344     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
 345                           ::run( range, body, partitioner, context );
 346     return body.result();
 347 }
 348
 349 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
 350 /** @ingroup algorithms **/
 351 template<typename Range, typename Value, typename RealBody, typename Reduction>
 352 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
 353                        affinity_partitioner& partitioner, task_group_context& context ) {
 354     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
 355     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
 356                                         ::run( range, body, partitioner, context );
 357     return body.result();
 358 }
 359 #endif /* __TBB_TASK_GROUP_CONTEXT */
 360 //@}
 361
 362 } // namespace tbb
 363
 364 #endif /* __TBB_parallel_reduce_H */
 365