Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef __TBB_parallel_scan_H
22 #define __TBB_parallel_scan_H
23 
24 #include "task.h"
25 #include "aligned_space.h"
26 #include <new>
27 #include "partitioner.h"
28 
29 namespace tbb {
30 
32 
33 struct pre_scan_tag {
34  static bool is_final_scan() {return false;}
35  operator bool() {return is_final_scan();}
36 };
37 
39 
41  static bool is_final_scan() {return true;}
42  operator bool() {return is_final_scan();}
43 };
44 
46 namespace internal {
47 
49 
50  template<typename Range, typename Body>
51  class final_sum: public task {
52  public:
53  Body my_body;
54  private:
58  public:
59  final_sum( Body& body_ ) :
60  my_body(body_,split())
61  {
63  }
65  my_range.begin()->~Range();
66  }
67  void finish_construction( const Range& range_, Body* stuff_last_ ) {
68  new( my_range.begin() ) Range(range_);
69  my_stuff_last = stuff_last_;
70  }
71  private:
74  if( my_stuff_last )
75  my_stuff_last->assign(my_body);
76  return NULL;
77  }
78  };
79 
81 
82  template<typename Range, typename Body>
83  class sum_node: public task {
85  public:
89  private:
94  Range my_range;
95  sum_node( const Range range_, bool left_is_final_ ) :
96  my_stuff_last(NULL),
97  my_left_sum(NULL),
98  my_left(NULL),
99  my_right(NULL),
100  my_left_is_final(left_is_final_),
101  my_range(range_)
102  {
103  // Poison fields that will be set by second pass.
106  }
107  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
108  if( !n ) {
109  f.recycle_as_child_of( *this );
110  f.finish_construction( range_, stuff_last_ );
111  return &f;
112  } else {
113  n->my_body = &f;
114  n->my_incoming = incoming_;
115  n->my_stuff_last = stuff_last_;
116  return n;
117  }
118  }
120  if( my_body ) {
121  if( my_incoming )
122  my_left_sum->my_body.reverse_join( my_incoming->my_body );
124  sum_node& c = *this;
127  set_ref_count( (a!=NULL)+(b!=NULL) );
128  my_body = NULL;
129  if( a ) spawn(*b);
130  else a = b;
131  return a;
132  } else {
133  return NULL;
134  }
135  }
136  template<typename Range_,typename Body_,typename Partitioner_>
137  friend class start_scan;
138 
139  template<typename Range_,typename Body_>
140  friend class finish_scan;
141  };
142 
144 
145  template<typename Range, typename Body>
146  class finish_scan: public task {
151  public:
154 
156  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
157  if( my_result.my_left )
158  my_result.my_left_is_final = false;
159  if( my_right_zombie && my_sum )
160  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
161  __TBB_ASSERT( !my_return_slot, NULL );
164  } else {
165  destroy( my_result );
166  }
167  if( my_right_zombie && !my_sum && !my_result.my_right ) {
168  destroy(*my_right_zombie);
169  my_right_zombie = NULL;
170  }
171  return NULL;
172  }
173 
174  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
175  my_sum(sum_),
176  my_return_slot(return_slot_),
177  my_right_zombie(NULL),
178  my_result(result_)
179  {
180  __TBB_ASSERT( !my_return_slot, NULL );
181  }
182  };
183 
185 
186  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
187  class start_scan: public task {
198  Range my_range;
199  typename Partitioner::partition_type my_partition;
201  public:
202  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
203  my_body(parent_.my_body),
204  my_sum(parent_.my_sum),
205  my_return_slot(&return_slot_),
206  my_parent_sum(parent_sum_),
207  my_is_final(parent_.my_is_final),
208  my_is_right_child(false),
209  my_range(parent_.my_range,split()),
210  my_partition(parent_.my_partition,split())
211  {
212  __TBB_ASSERT( !*my_return_slot, NULL );
213  }
214 
215  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
216  my_body(&body_),
217  my_sum(NULL),
218  my_return_slot(&return_slot_),
219  my_parent_sum(NULL),
220  my_is_final(true),
221  my_is_right_child(false),
222  my_range(range_),
223  my_partition(partitioner_)
224  {
225  __TBB_ASSERT( !*my_return_slot, NULL );
226  }
227 
228  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
229  if( !range_.empty() ) {
230  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
231  internal::sum_node<Range,Body>* root = NULL;
232  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
233  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
234  /*my_return_slot=*/root,
235  range_,
236  *temp_body,
237  partitioner_ );
238  temp_body->my_body.reverse_join(body_);
239  task::spawn_root_and_wait( pass1 );
240  if( root ) {
241  root->my_body = temp_body;
242  root->my_incoming = NULL;
243  root->my_stuff_last = &body_;
244  task::spawn_root_and_wait( *root );
245  } else {
246  body_.assign(temp_body->my_body);
247  temp_body->finish_construction( range_, NULL );
248  temp_body->destroy(*temp_body);
249  }
250  }
251  }
252  };
253 
254  template<typename Range, typename Body, typename Partitioner>
256  typedef internal::finish_scan<Range,Body> finish_pass1_type;
257  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
258  // Inspecting p->result.left_sum would ordinarily be a race condition.
259  // But we inspect it only if we are not a stolen task, in which case we
260  // know that task assigning to p->result.left_sum has completed.
261  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
262  if( treat_as_stolen ) {
263  // Invocation is for right child that has been really stolen or needs to be virtually stolen
264  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
265  my_is_final = false;
266  }
267  task* next_task = NULL;
268  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
269  if( my_is_final )
270  (my_body->my_body)( my_range, final_scan_tag() );
271  else if( my_sum )
272  (my_body->my_body)( my_range, pre_scan_tag() );
273  if( my_sum )
274  *my_sum = my_body;
275  __TBB_ASSERT( !*my_return_slot, NULL );
276  } else {
277  sum_node_type* result;
278  if( my_parent_sum )
279  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
280  else
281  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
282  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
283  // Split off right child
284  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
285  b.my_is_right_child = true;
286  // Left child is recycling of *this. Must recycle this before spawning b,
287  // otherwise b might complete and decrement c.ref_count() to zero, which
288  // would cause c.execute() to run prematurely.
289  recycle_as_child_of(c);
290  c.set_ref_count(2);
291  c.spawn(b);
292  my_sum = &result->my_left_sum;
293  my_return_slot = &result->my_left;
294  my_is_right_child = false;
295  next_task = this;
296  my_parent_sum = result;
297  __TBB_ASSERT( !*my_return_slot, NULL );
298  }
299  return next_task;
300  }
301 
302  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
304  Value my_sum;
305  const Value& identity_element;
306  const Scan& my_scan;
307  const ReverseJoin& my_reverse_join;
308  public:
309  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
310  : my_sum(identity)
311  , identity_element(identity)
312  , my_scan(scan)
313  , my_reverse_join(rev_join) {}
314 
318  , my_scan(b.my_scan)
320 
321  template<typename Tag>
322  void operator()( const Range& r, Tag tag ) {
323  my_sum = my_scan(r, my_sum, tag);
324  }
325 
328  }
329 
331  my_sum = b.my_sum;
332  }
333 
334  Value result() const {
335  return my_sum;
336  }
337  };
338 } // namespace internal
340 
341 // Requirements on Range concept are documented in blocked_range.h
342 
360 
362 
363 template<typename Range, typename Body>
364 void parallel_scan( const Range& range, Body& body ) {
366 }
367 
369 
370 template<typename Range, typename Body>
371 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
373 }
374 
376 
377 template<typename Range, typename Body>
378 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
380 }
381 
383 
384 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
385 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
386  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
388  return body.result();
389 }
390 
392 
393 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
394 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
395  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
396  tbb::parallel_scan(range,body,partitioner);
397  return body.result();
398 }
399 
401 
402 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
403 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
404  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
405  tbb::parallel_scan(range,body,partitioner);
406  return body.result();
407 }
408 
410 
411 } // namespace tbb
412 
413 #endif /* __TBB_parallel_scan_H */
414 
Performs final scan for a leaf.
Definition: parallel_scan.h:51
static bool is_final_scan()
Definition: parallel_scan.h:34
#define __TBB_override
Definition: tbb_stddef.h:244
final_sum_type * my_left_sum
Definition: parallel_scan.h:90
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
An auto partitioner.
Definition: partitioner.h:614
T * begin() const
Pointer to beginning of array.
Definition: aligned_space.h:39
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:72
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
void operator()(const Range &r, Tag tag)
lambda_scan_body(lambda_scan_body &b, split)
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:57
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:781
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum_type * my_body
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:84
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:95
sum_node_type *& my_return_slot
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:698
Base class for user-defined tasks.
Definition: task.h:592
final_sum< Range, Body > final_sum_type
final_sum_type * my_body
Definition: parallel_scan.h:87
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:33
void const char const char int ITT_FORMAT __itt_group_sync p
task * execute() __TBB_override
Should be overridden by derived classes.
Base class for types that should not be assigned.
Definition: tbb_stddef.h:324
const ReverseJoin & my_reverse_join
aligned_space< Range > my_range
Definition: parallel_scan.h:55
The graph class.
Combine partial results.
final_sum< Range, Body > final_sum_type
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:40
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:399
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
final_sum_type **const my_sum
sum_node_type ** my_return_slot
task * execute() __TBB_override
Should be overridden by derived classes.
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:309
final_sum_type * my_incoming
Definition: parallel_scan.h:86
void assign(lambda_scan_body &b)
static bool is_final_scan()
Definition: parallel_scan.h:41
void reverse_join(lambda_scan_body &a)
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:597
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
void set_ref_count(int count)
Set reference count.
Definition: task.h:734
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:67
void recycle_as_continuation()
Change this to be a continuation of its former self.
Definition: task.h:684
final_sum_type ** my_sum
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:636
sum_node< Range, Body > sum_node_type
sum_node< Range, Body > sum_node_type
Split work to be done in the scan.
Definition: parallel_scan.h:83
sum_node_type * my_parent_sum
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
Partitioner::partition_type my_partition
int ref_count() const
The internal reference count.
Definition: task.h:862
final_sum_type * my_right_zombie
Initial task to split the work.
A simple partitioner.
Definition: partitioner.h:587

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.