16 #include "kmp_stats.h"
17 #include "kmp_wait_release.h"
18 #include "kmp_taskdeps.h"
21 #include "ompt-specific.h"
25 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
26 kmp_info_t *this_thr);
27 static void __kmp_alloc_task_deque(kmp_info_t *thread,
28 kmp_thread_data_t *thread_data);
29 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
30 kmp_task_team_t *task_team);
31 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
33 #ifdef BUILD_TIED_TASK_STACK
42 static void __kmp_trace_task_stack(kmp_int32 gtid,
43 kmp_thread_data_t *thread_data,
44 int threshold,
char *location) {
45 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
46 kmp_taskdata_t **stack_top = task_stack->ts_top;
47 kmp_int32 entries = task_stack->ts_entries;
48 kmp_taskdata_t *tied_task;
52 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
53 "first_block = %p, stack_top = %p \n",
54 location, gtid, entries, task_stack->ts_first_block, stack_top));
56 KMP_DEBUG_ASSERT(stack_top != NULL);
57 KMP_DEBUG_ASSERT(entries > 0);
59 while (entries != 0) {
60 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
62 if (entries & TASK_STACK_INDEX_MASK == 0) {
63 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
65 stack_block = stack_block->sb_prev;
66 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
73 tied_task = *stack_top;
75 KMP_DEBUG_ASSERT(tied_task != NULL);
76 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
79 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
80 "stack_top=%p, tied_task=%p\n",
81 location, gtid, entries, stack_top, tied_task));
83 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
86 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
96 static void __kmp_init_task_stack(kmp_int32 gtid,
97 kmp_thread_data_t *thread_data) {
98 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
99 kmp_stack_block_t *first_block;
102 first_block = &task_stack->ts_first_block;
103 task_stack->ts_top = (kmp_taskdata_t **)first_block;
104 memset((
void *)first_block,
'\0',
105 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
108 task_stack->ts_entries = TASK_STACK_EMPTY;
109 first_block->sb_next = NULL;
110 first_block->sb_prev = NULL;
117 static void __kmp_free_task_stack(kmp_int32 gtid,
118 kmp_thread_data_t *thread_data) {
119 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
120 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
122 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
124 while (stack_block != NULL) {
125 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
127 stack_block->sb_next = NULL;
128 stack_block->sb_prev = NULL;
129 if (stack_block != &task_stack->ts_first_block) {
130 __kmp_thread_free(thread,
133 stack_block = next_block;
136 task_stack->ts_entries = 0;
137 task_stack->ts_top = NULL;
146 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
147 kmp_taskdata_t *tied_task) {
149 kmp_thread_data_t *thread_data =
150 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
151 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
153 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
157 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
158 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
161 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
162 gtid, thread, tied_task));
164 *(task_stack->ts_top) = tied_task;
167 task_stack->ts_top++;
168 task_stack->ts_entries++;
170 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
172 kmp_stack_block_t *stack_block =
173 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
176 if (stack_block->sb_next !=
178 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
180 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
181 thread,
sizeof(kmp_stack_block_t));
183 task_stack->ts_top = &new_block->sb_block[0];
184 stack_block->sb_next = new_block;
185 new_block->sb_prev = stack_block;
186 new_block->sb_next = NULL;
190 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
191 gtid, tied_task, new_block));
194 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
205 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
206 kmp_taskdata_t *ending_task) {
208 kmp_thread_data_t *thread_data =
209 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
210 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
211 kmp_taskdata_t *tied_task;
213 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
218 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
219 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
221 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
225 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
226 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
228 stack_block = stack_block->sb_prev;
229 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
233 task_stack->ts_top--;
234 task_stack->ts_entries--;
236 tied_task = *(task_stack->ts_top);
238 KMP_DEBUG_ASSERT(tied_task != NULL);
239 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
240 KMP_DEBUG_ASSERT(tied_task == ending_task);
242 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
251 static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
252 const kmp_taskdata_t *tasknew,
253 const kmp_taskdata_t *taskcurr) {
254 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
258 kmp_taskdata_t *current = taskcurr->td_last_tied;
259 KMP_DEBUG_ASSERT(current != NULL);
261 if (current->td_flags.tasktype == TASK_EXPLICIT ||
262 current->td_taskwait_thread > 0) {
263 kmp_int32 level = current->td_level;
264 kmp_taskdata_t *parent = tasknew->td_parent;
265 while (parent != current && parent->td_level > level) {
267 parent = parent->td_parent;
268 KMP_DEBUG_ASSERT(parent != NULL);
270 if (parent != current)
275 kmp_depnode_t *node = tasknew->td_depnode;
276 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
277 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
278 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
279 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
282 for (
int j = i - 1; j >= 0; --j)
283 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
287 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
296 static void __kmp_realloc_task_deque(kmp_info_t *thread,
297 kmp_thread_data_t *thread_data) {
298 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
299 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
300 kmp_int32 new_size = 2 * size;
302 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
303 "%d] for thread_data %p\n",
304 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
306 kmp_taskdata_t **new_deque =
307 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
310 for (i = thread_data->td.td_deque_head, j = 0; j < size;
311 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
312 new_deque[j] = thread_data->td.td_deque[i];
314 __kmp_free(thread_data->td.td_deque);
316 thread_data->td.td_deque_head = 0;
317 thread_data->td.td_deque_tail = size;
318 thread_data->td.td_deque = new_deque;
319 thread_data->td.td_deque_size = new_size;
323 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
324 kmp_info_t *thread = __kmp_threads[gtid];
325 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
330 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
331 !KMP_HIDDEN_HELPER_THREAD(gtid))) {
332 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
333 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
335 __kmp_hidden_helper_worker_thread_signal();
336 return TASK_SUCCESSFULLY_PUSHED;
339 kmp_task_team_t *task_team = thread->th.th_task_team;
340 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
341 kmp_thread_data_t *thread_data;
344 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
346 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
349 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
350 KMP_DEBUG_USE_VAR(counter);
353 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
354 gtid, counter, taskdata));
358 if (UNLIKELY(taskdata->td_flags.task_serial)) {
359 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
360 "TASK_NOT_PUSHED for task %p\n",
362 return TASK_NOT_PUSHED;
367 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
368 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
369 __kmp_enable_tasking(task_team, thread);
371 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
372 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
375 thread_data = &task_team->tt.tt_threads_data[tid];
380 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
381 __kmp_alloc_task_deque(thread, thread_data);
386 if (TCR_4(thread_data->td.td_deque_ntasks) >=
387 TASK_DEQUE_SIZE(thread_data->td)) {
388 if (__kmp_enable_task_throttling &&
389 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
390 thread->th.th_current_task)) {
391 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
392 "TASK_NOT_PUSHED for task %p\n",
394 return TASK_NOT_PUSHED;
396 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
398 if (TCR_4(thread_data->td.td_deque_ntasks) >=
399 TASK_DEQUE_SIZE(thread_data->td)) {
401 __kmp_realloc_task_deque(thread, thread_data);
407 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
409 if (TCR_4(thread_data->td.td_deque_ntasks) >=
410 TASK_DEQUE_SIZE(thread_data->td)) {
411 if (__kmp_enable_task_throttling &&
412 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
413 thread->th.th_current_task)) {
414 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
415 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
416 "returning TASK_NOT_PUSHED for task %p\n",
418 return TASK_NOT_PUSHED;
421 __kmp_realloc_task_deque(thread, thread_data);
426 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
427 TASK_DEQUE_SIZE(thread_data->td));
429 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
432 thread_data->td.td_deque_tail =
433 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
434 TCW_4(thread_data->td.td_deque_ntasks,
435 TCR_4(thread_data->td.td_deque_ntasks) + 1);
436 KMP_FSYNC_RELEASING(thread->th.th_current_task);
437 KMP_FSYNC_RELEASING(taskdata);
438 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
439 "task=%p ntasks=%d head=%u tail=%u\n",
440 gtid, taskdata, thread_data->td.td_deque_ntasks,
441 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
443 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
445 return TASK_SUCCESSFULLY_PUSHED;
452 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
453 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
454 "this_thread=%p, curtask=%p, "
455 "curtask_parent=%p\n",
456 0, this_thr, this_thr->th.th_current_task,
457 this_thr->th.th_current_task->td_parent));
459 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
461 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
462 "this_thread=%p, curtask=%p, "
463 "curtask_parent=%p\n",
464 0, this_thr, this_thr->th.th_current_task,
465 this_thr->th.th_current_task->td_parent));
474 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
478 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
481 tid, this_thr, this_thr->th.th_current_task,
482 team->t.t_implicit_task_taskdata[tid].td_parent));
484 KMP_DEBUG_ASSERT(this_thr != NULL);
487 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
488 team->t.t_implicit_task_taskdata[0].td_parent =
489 this_thr->th.th_current_task;
490 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
493 team->t.t_implicit_task_taskdata[tid].td_parent =
494 team->t.t_implicit_task_taskdata[0].td_parent;
495 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
498 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
501 tid, this_thr, this_thr->th.th_current_task,
502 team->t.t_implicit_task_taskdata[tid].td_parent));
510 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
511 kmp_taskdata_t *current_task) {
512 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
513 kmp_info_t *thread = __kmp_threads[gtid];
516 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
517 gtid, taskdata, current_task));
519 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
524 current_task->td_flags.executing = 0;
527 #ifdef BUILD_TIED_TASK_STACK
528 if (taskdata->td_flags.tiedness == TASK_TIED) {
529 __kmp_push_task_stack(gtid, thread, taskdata);
534 thread->th.th_current_task = taskdata;
536 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
537 taskdata->td_flags.tiedness == TASK_UNTIED);
538 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
539 taskdata->td_flags.tiedness == TASK_UNTIED);
540 taskdata->td_flags.started = 1;
541 taskdata->td_flags.executing = 1;
542 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
543 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
550 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
561 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
563 task->ompt_task_info.task_data.value = 0;
564 task->ompt_task_info.frame.exit_frame = ompt_data_none;
565 task->ompt_task_info.frame.enter_frame = ompt_data_none;
566 task->ompt_task_info.frame.exit_frame_flags =
567 ompt_frame_runtime | ompt_frame_framepointer;
568 task->ompt_task_info.frame.enter_frame_flags =
569 ompt_frame_runtime | ompt_frame_framepointer;
574 static inline void __ompt_task_start(kmp_task_t *task,
575 kmp_taskdata_t *current_task,
577 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
578 ompt_task_status_t status = ompt_task_switch;
579 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
580 status = ompt_task_yield;
581 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
584 if (ompt_enabled.ompt_callback_task_schedule) {
585 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
586 &(current_task->ompt_task_info.task_data), status,
587 &(taskdata->ompt_task_info.task_data));
589 taskdata->ompt_task_info.scheduling_parent = current_task;
594 static inline void __ompt_task_finish(kmp_task_t *task,
595 kmp_taskdata_t *resumed_task,
596 ompt_task_status_t status) {
597 if (ompt_enabled.ompt_callback_task_schedule) {
598 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
599 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
600 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
601 status = ompt_task_cancel;
605 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
606 &(taskdata->ompt_task_info.task_data), status,
607 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
613 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
616 void *return_address) {
617 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
618 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
620 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
622 gtid, loc_ref, taskdata, current_task));
624 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
627 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
628 KMP_DEBUG_USE_VAR(counter);
629 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
630 "incremented for task %p\n",
631 gtid, counter, taskdata));
634 taskdata->td_flags.task_serial =
636 __kmp_task_start(gtid, task, current_task);
640 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
641 current_task->ompt_task_info.frame.enter_frame.ptr =
642 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
643 current_task->ompt_task_info.frame.enter_frame_flags =
644 taskdata->ompt_task_info.frame.exit_frame_flags =
645 ompt_frame_application | ompt_frame_framepointer;
647 if (ompt_enabled.ompt_callback_task_create) {
648 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
649 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
650 &(parent_info->task_data), &(parent_info->frame),
651 &(taskdata->ompt_task_info.task_data),
652 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
655 __ompt_task_start(task, current_task, gtid);
659 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
665 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
668 void *return_address) {
669 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
680 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
683 if (UNLIKELY(ompt_enabled.enabled)) {
684 OMPT_STORE_RETURN_ADDRESS(gtid);
685 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
686 OMPT_GET_FRAME_ADDRESS(1),
687 OMPT_LOAD_RETURN_ADDRESS(gtid));
691 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
697 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
698 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
702 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
703 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
705 __kmp_task_start(gtid, task, current_task);
707 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
708 loc_ref, KMP_TASK_TO_TASKDATA(task)));
718 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
719 kmp_info_t *thread) {
720 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
724 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
725 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
726 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
727 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
728 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
729 taskdata->td_flags.task_serial == 1);
730 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
732 taskdata->td_flags.freed = 1;
735 __kmp_fast_free(thread, taskdata);
737 __kmp_thread_free(thread, taskdata);
739 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
748 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
749 kmp_taskdata_t *taskdata,
750 kmp_info_t *thread) {
753 kmp_int32 team_serial =
754 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
755 !taskdata->td_flags.proxy;
756 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
758 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
759 KMP_DEBUG_ASSERT(children >= 0);
762 while (children == 0) {
763 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
765 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
766 "and freeing itself\n",
770 __kmp_free_task(gtid, taskdata, thread);
772 taskdata = parent_taskdata;
778 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
779 if (taskdata->td_dephash) {
780 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
781 kmp_tasking_flags_t flags_old = taskdata->td_flags;
782 if (children == 0 && flags_old.complete == 1) {
783 kmp_tasking_flags_t flags_new = flags_old;
784 flags_new.complete = 0;
785 if (KMP_COMPARE_AND_STORE_ACQ32(
786 RCAST(kmp_int32 *, &taskdata->td_flags),
787 *RCAST(kmp_int32 *, &flags_old),
788 *RCAST(kmp_int32 *, &flags_new))) {
789 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
790 "dephash of implicit task %p\n",
793 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
800 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
801 KMP_DEBUG_ASSERT(children >= 0);
805 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
806 "not freeing it yet\n",
807 gtid, taskdata, children));
818 static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
819 kmp_tasking_flags_t flags = taskdata->td_flags;
820 bool ret = !(flags.team_serial || flags.tasking_ser);
821 ret = ret || flags.proxy == TASK_PROXY ||
822 flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
824 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
838 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
839 kmp_taskdata_t *resumed_task) {
840 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
841 kmp_info_t *thread = __kmp_threads[gtid];
842 kmp_task_team_t *task_team =
843 thread->th.th_task_team;
845 kmp_int32 children = 0;
847 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
849 gtid, taskdata, resumed_task));
851 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
854 #ifdef BUILD_TIED_TASK_STACK
855 if (taskdata->td_flags.tiedness == TASK_TIED) {
856 __kmp_pop_task_stack(gtid, thread, taskdata);
860 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
863 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
866 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
867 gtid, counter, taskdata));
871 if (resumed_task == NULL) {
872 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
873 resumed_task = taskdata->td_parent;
876 thread->th.th_current_task = resumed_task;
877 resumed_task->td_flags.executing = 1;
878 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
879 "resuming task %p\n",
880 gtid, taskdata, resumed_task));
888 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
889 taskdata->td_flags.task_serial);
890 if (taskdata->td_flags.task_serial) {
891 if (resumed_task == NULL) {
892 resumed_task = taskdata->td_parent;
896 KMP_DEBUG_ASSERT(resumed_task !=
906 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
907 kmp_routine_entry_t destr_thunk = task->data1.destructors;
908 KMP_ASSERT(destr_thunk);
909 destr_thunk(gtid, task);
912 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
913 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
914 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
917 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
918 if (taskdata->td_allow_completion_event.type ==
919 KMP_EVENT_ALLOW_COMPLETION) {
921 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
922 if (taskdata->td_allow_completion_event.type ==
923 KMP_EVENT_ALLOW_COMPLETION) {
925 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
926 taskdata->td_flags.executing = 0;
933 __ompt_task_finish(task, resumed_task, ompt_task_detach);
939 taskdata->td_flags.proxy = TASK_PROXY;
942 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
947 taskdata->td_flags.complete = 1;
952 __ompt_task_finish(task, resumed_task, ompt_task_complete);
956 if (__kmp_track_children_task(taskdata)) {
957 __kmp_release_deps(gtid, taskdata);
962 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
963 KMP_DEBUG_ASSERT(children >= 0);
964 if (taskdata->td_taskgroup)
965 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
966 }
else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
967 task_team->tt.tt_hidden_helper_task_encountered)) {
970 __kmp_release_deps(gtid, taskdata);
976 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
977 taskdata->td_flags.executing = 0;
981 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
982 gtid, taskdata, children));
988 thread->th.th_current_task = resumed_task;
990 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
994 resumed_task->td_flags.executing = 1;
997 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
998 gtid, taskdata, resumed_task));
1003 template <
bool ompt>
1004 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
1007 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1008 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1009 KMP_DEBUG_ASSERT(gtid >= 0);
1011 __kmp_task_finish<ompt>(gtid, task, NULL);
1013 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1014 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1018 ompt_frame_t *ompt_frame;
1019 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1020 ompt_frame->enter_frame = ompt_data_none;
1021 ompt_frame->enter_frame_flags =
1022 ompt_frame_runtime | ompt_frame_framepointer;
1031 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1033 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1042 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1045 if (UNLIKELY(ompt_enabled.enabled)) {
1046 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1050 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1056 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1058 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1059 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1061 __kmp_task_finish<false>(gtid, task,
1064 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1065 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1081 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1082 kmp_team_t *team,
int tid,
int set_curr_task) {
1083 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1087 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1088 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1090 task->td_task_id = KMP_GEN_TASK_ID();
1091 task->td_team = team;
1094 task->td_ident = loc_ref;
1095 task->td_taskwait_ident = NULL;
1096 task->td_taskwait_counter = 0;
1097 task->td_taskwait_thread = 0;
1099 task->td_flags.tiedness = TASK_TIED;
1100 task->td_flags.tasktype = TASK_IMPLICIT;
1101 task->td_flags.proxy = TASK_FULL;
1104 task->td_flags.task_serial = 1;
1105 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1106 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1108 task->td_flags.started = 1;
1109 task->td_flags.executing = 1;
1110 task->td_flags.complete = 0;
1111 task->td_flags.freed = 0;
1113 task->td_depnode = NULL;
1114 task->td_last_tied = task;
1115 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1117 if (set_curr_task) {
1118 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1120 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1121 task->td_taskgroup = NULL;
1122 task->td_dephash = NULL;
1123 __kmp_push_current_task_to_thread(this_thr, team, tid);
1125 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1126 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1130 if (UNLIKELY(ompt_enabled.enabled))
1131 __ompt_task_init(task, tid);
1134 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1143 void __kmp_finish_implicit_task(kmp_info_t *thread) {
1144 kmp_taskdata_t *task = thread->th.th_current_task;
1145 if (task->td_dephash) {
1147 task->td_flags.complete = 1;
1148 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1149 kmp_tasking_flags_t flags_old = task->td_flags;
1150 if (children == 0 && flags_old.complete == 1) {
1151 kmp_tasking_flags_t flags_new = flags_old;
1152 flags_new.complete = 0;
1153 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1154 *RCAST(kmp_int32 *, &flags_old),
1155 *RCAST(kmp_int32 *, &flags_new))) {
1156 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1157 "dephash of implicit task %p\n",
1158 thread->th.th_info.ds.ds_gtid, task));
1159 __kmp_dephash_free_entries(thread, task->td_dephash);
1169 void __kmp_free_implicit_task(kmp_info_t *thread) {
1170 kmp_taskdata_t *task = thread->th.th_current_task;
1171 if (task && task->td_dephash) {
1172 __kmp_dephash_free(thread, task->td_dephash);
1173 task->td_dephash = NULL;
1179 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1180 if (size & (val - 1)) {
1182 if (size <= KMP_SIZE_T_MAX - val) {
1201 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1202 kmp_tasking_flags_t *flags,
1203 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1204 kmp_routine_entry_t task_entry) {
1206 kmp_taskdata_t *taskdata;
1207 kmp_info_t *thread = __kmp_threads[gtid];
1208 kmp_team_t *team = thread->th.th_team;
1209 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1210 size_t shareds_offset;
1212 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1213 __kmp_middle_initialize();
1215 if (flags->hidden_helper) {
1216 if (__kmp_enable_hidden_helper) {
1217 if (!TCR_4(__kmp_init_hidden_helper))
1218 __kmp_hidden_helper_initialize();
1221 flags->hidden_helper = FALSE;
1225 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1226 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1227 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1228 sizeof_shareds, task_entry));
1230 KMP_DEBUG_ASSERT(parent_task);
1231 if (parent_task->td_flags.final) {
1232 if (flags->merged_if0) {
1237 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1241 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1247 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1248 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1249 if (flags->proxy == TASK_PROXY) {
1250 flags->tiedness = TASK_UNTIED;
1251 flags->merged_if0 = 1;
1255 if ((thread->th.th_task_team) == NULL) {
1258 KMP_DEBUG_ASSERT(team->t.t_serialized);
1260 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1263 __kmp_task_team_setup(thread, team, 1);
1264 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1266 kmp_task_team_t *task_team = thread->th.th_task_team;
1269 if (!KMP_TASKING_ENABLED(task_team)) {
1272 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1273 __kmp_enable_tasking(task_team, thread);
1274 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1275 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1277 if (thread_data->td.td_deque == NULL) {
1278 __kmp_alloc_task_deque(thread, thread_data);
1282 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1283 task_team->tt.tt_found_proxy_tasks == FALSE)
1284 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1285 if (flags->hidden_helper &&
1286 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1287 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1292 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1293 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1296 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1298 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1303 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1306 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1310 task = KMP_TASKDATA_TO_TASK(taskdata);
1313 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
1314 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1315 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1317 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1318 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1320 if (sizeof_shareds > 0) {
1322 task->shareds = &((
char *)taskdata)[shareds_offset];
1324 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1327 task->shareds = NULL;
1329 task->routine = task_entry;
1332 taskdata->td_task_id = KMP_GEN_TASK_ID();
1333 taskdata->td_team = thread->th.th_team;
1334 taskdata->td_alloc_thread = thread;
1335 taskdata->td_parent = parent_task;
1336 taskdata->td_level = parent_task->td_level + 1;
1337 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1338 taskdata->td_ident = loc_ref;
1339 taskdata->td_taskwait_ident = NULL;
1340 taskdata->td_taskwait_counter = 0;
1341 taskdata->td_taskwait_thread = 0;
1342 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1344 if (flags->proxy == TASK_FULL)
1345 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1347 taskdata->td_flags = *flags;
1348 taskdata->td_task_team = thread->th.th_task_team;
1349 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1350 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1353 if (flags->hidden_helper) {
1354 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1355 taskdata->td_team = shadow_thread->th.th_team;
1356 taskdata->td_task_team = shadow_thread->th.th_task_team;
1360 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1363 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1369 taskdata->td_flags.task_serial =
1370 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1371 taskdata->td_flags.tasking_ser || flags->merged_if0);
1373 taskdata->td_flags.started = 0;
1374 taskdata->td_flags.executing = 0;
1375 taskdata->td_flags.complete = 0;
1376 taskdata->td_flags.freed = 0;
1378 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1380 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1381 taskdata->td_taskgroup =
1382 parent_task->td_taskgroup;
1383 taskdata->td_dephash = NULL;
1384 taskdata->td_depnode = NULL;
1385 if (flags->tiedness == TASK_UNTIED)
1386 taskdata->td_last_tied = NULL;
1388 taskdata->td_last_tied = taskdata;
1389 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1391 if (UNLIKELY(ompt_enabled.enabled))
1392 __ompt_task_init(taskdata, gtid);
1396 if (__kmp_track_children_task(taskdata)) {
1397 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1398 if (parent_task->td_taskgroup)
1399 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1402 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1403 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1405 if (flags->hidden_helper) {
1406 taskdata->td_flags.task_serial = FALSE;
1408 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1412 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1413 gtid, taskdata, taskdata->td_parent));
1418 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1419 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1420 size_t sizeof_shareds,
1421 kmp_routine_entry_t task_entry) {
1423 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1424 __kmp_assert_valid_gtid(gtid);
1425 input_flags->native = FALSE;
1427 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1428 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1429 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1430 input_flags->proxy ?
"proxy" :
"",
1431 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1432 sizeof_shareds, task_entry));
1434 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1435 sizeof_shareds, task_entry);
1437 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1442 kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1444 size_t sizeof_kmp_task_t,
1445 size_t sizeof_shareds,
1446 kmp_routine_entry_t task_entry,
1447 kmp_int64 device_id) {
1448 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1450 input_flags.tiedness = TASK_UNTIED;
1452 if (__kmp_enable_hidden_helper)
1453 input_flags.hidden_helper = TRUE;
1455 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1456 sizeof_shareds, task_entry);
1474 kmp_task_t *new_task, kmp_int32 naffins,
1475 kmp_task_affinity_info_t *affin_list) {
1484 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1485 kmp_taskdata_t *current_task) {
1486 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1490 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1491 gtid, taskdata, current_task));
1492 KMP_DEBUG_ASSERT(task);
1493 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1494 taskdata->td_flags.complete == 1)) {
1499 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1502 __kmp_bottom_half_finish_proxy(gtid, task);
1504 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1505 "proxy task %p, resuming task %p\n",
1506 gtid, taskdata, current_task));
1514 ompt_thread_info_t oldInfo;
1515 if (UNLIKELY(ompt_enabled.enabled)) {
1517 thread = __kmp_threads[gtid];
1518 oldInfo = thread->th.ompt_thread_info;
1519 thread->th.ompt_thread_info.wait_id = 0;
1520 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1521 ? ompt_state_work_serial
1522 : ompt_state_work_parallel;
1523 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1528 if (taskdata->td_flags.hidden_helper) {
1530 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1531 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1535 if (taskdata->td_flags.proxy != TASK_PROXY) {
1536 __kmp_task_start(gtid, task, current_task);
1542 if (UNLIKELY(__kmp_omp_cancellation)) {
1543 thread = __kmp_threads[gtid];
1544 kmp_team_t *this_team = thread->th.th_team;
1545 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1546 if ((taskgroup && taskgroup->cancel_request) ||
1547 (this_team->t.t_cancel_request == cancel_parallel)) {
1548 #if OMPT_SUPPORT && OMPT_OPTIONAL
1549 ompt_data_t *task_data;
1550 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1551 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1552 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1554 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1555 : ompt_cancel_parallel) |
1556 ompt_cancel_discarded_task,
1569 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1570 taskdata->td_last_tied = current_task->td_last_tied;
1571 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1573 #if KMP_STATS_ENABLED
1575 switch (KMP_GET_THREAD_STATE()) {
1576 case FORK_JOIN_BARRIER:
1577 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1580 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1583 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1586 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1589 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1592 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1599 if (UNLIKELY(ompt_enabled.enabled))
1600 __ompt_task_start(task, current_task, gtid);
1604 if (ompd_state & OMPD_ENABLE_BP)
1605 ompd_bp_task_begin();
1608 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1609 kmp_uint64 cur_time;
1610 kmp_int32 kmp_itt_count_task =
1611 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1612 current_task->td_flags.tasktype == TASK_IMPLICIT;
1613 if (kmp_itt_count_task) {
1614 thread = __kmp_threads[gtid];
1616 if (thread->th.th_bar_arrive_time)
1617 cur_time = __itt_get_timestamp();
1619 kmp_itt_count_task = 0;
1621 KMP_FSYNC_ACQUIRED(taskdata);
1624 if (task->routine != NULL) {
1625 #ifdef KMP_GOMP_COMPAT
1626 if (taskdata->td_flags.native) {
1627 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1631 (*(task->routine))(gtid, task);
1634 KMP_POP_PARTITIONED_TIMER();
1636 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1637 if (kmp_itt_count_task) {
1639 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1641 KMP_FSYNC_CANCEL(taskdata);
1642 KMP_FSYNC_RELEASING(taskdata->td_parent);
1647 if (ompd_state & OMPD_ENABLE_BP)
1652 if (taskdata->td_flags.proxy != TASK_PROXY) {
1654 if (UNLIKELY(ompt_enabled.enabled)) {
1655 thread->th.ompt_thread_info = oldInfo;
1656 if (taskdata->td_flags.tiedness == TASK_TIED) {
1657 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1659 __kmp_task_finish<true>(gtid, task, current_task);
1662 __kmp_task_finish<false>(gtid, task, current_task);
1667 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1668 gtid, taskdata, current_task));
1682 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1683 kmp_task_t *new_task) {
1684 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1686 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1687 loc_ref, new_taskdata));
1690 kmp_taskdata_t *parent;
1691 if (UNLIKELY(ompt_enabled.enabled)) {
1692 parent = new_taskdata->td_parent;
1693 if (ompt_enabled.ompt_callback_task_create) {
1694 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1695 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1696 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1697 OMPT_GET_RETURN_ADDRESS(0));
1705 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1707 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1708 new_taskdata->td_flags.task_serial = 1;
1709 __kmp_invoke_task(gtid, new_task, current_task);
1714 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1715 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1716 gtid, loc_ref, new_taskdata));
1719 if (UNLIKELY(ompt_enabled.enabled)) {
1720 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1723 return TASK_CURRENT_NOT_QUEUED;
1737 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1738 bool serialize_immediate) {
1739 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1743 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1744 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1746 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1747 if (serialize_immediate)
1748 new_taskdata->td_flags.task_serial = 1;
1749 __kmp_invoke_task(gtid, new_task, current_task);
1752 return TASK_CURRENT_NOT_QUEUED;
1767 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1768 kmp_task_t *new_task) {
1770 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1772 #if KMP_DEBUG || OMPT_SUPPORT
1773 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1775 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1777 __kmp_assert_valid_gtid(gtid);
1780 kmp_taskdata_t *parent = NULL;
1781 if (UNLIKELY(ompt_enabled.enabled)) {
1782 if (!new_taskdata->td_flags.started) {
1783 OMPT_STORE_RETURN_ADDRESS(gtid);
1784 parent = new_taskdata->td_parent;
1785 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1786 parent->ompt_task_info.frame.enter_frame.ptr =
1787 OMPT_GET_FRAME_ADDRESS(0);
1789 if (ompt_enabled.ompt_callback_task_create) {
1790 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1791 &(parent->ompt_task_info.task_data),
1792 &(parent->ompt_task_info.frame),
1793 &(new_taskdata->ompt_task_info.task_data),
1794 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1795 OMPT_LOAD_RETURN_ADDRESS(gtid));
1800 __ompt_task_finish(new_task,
1801 new_taskdata->ompt_task_info.scheduling_parent,
1803 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1808 res = __kmp_omp_task(gtid, new_task,
true);
1810 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1811 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1812 gtid, loc_ref, new_taskdata));
1814 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1815 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1834 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1835 kmp_task_t *new_task,
void *codeptr_ra) {
1837 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1839 #if KMP_DEBUG || OMPT_SUPPORT
1840 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1842 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1846 kmp_taskdata_t *parent = NULL;
1847 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1848 parent = new_taskdata->td_parent;
1849 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1850 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1851 if (ompt_enabled.ompt_callback_task_create) {
1852 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1853 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1854 &(new_taskdata->ompt_task_info.task_data),
1855 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1861 res = __kmp_omp_task(gtid, new_task,
true);
1863 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1864 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1865 gtid, loc_ref, new_taskdata));
1867 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1868 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1874 template <
bool ompt>
1875 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1876 void *frame_address,
1877 void *return_address) {
1878 kmp_taskdata_t *taskdata =
nullptr;
1880 int thread_finished = FALSE;
1881 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1883 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1884 KMP_DEBUG_ASSERT(gtid >= 0);
1886 if (__kmp_tasking_mode != tskm_immediate_exec) {
1887 thread = __kmp_threads[gtid];
1888 taskdata = thread->th.th_current_task;
1890 #if OMPT_SUPPORT && OMPT_OPTIONAL
1891 ompt_data_t *my_task_data;
1892 ompt_data_t *my_parallel_data;
1895 my_task_data = &(taskdata->ompt_task_info.task_data);
1896 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1898 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
1900 if (ompt_enabled.ompt_callback_sync_region) {
1901 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1902 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1903 my_task_data, return_address);
1906 if (ompt_enabled.ompt_callback_sync_region_wait) {
1907 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1908 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1909 my_task_data, return_address);
1919 taskdata->td_taskwait_counter += 1;
1920 taskdata->td_taskwait_ident = loc_ref;
1921 taskdata->td_taskwait_thread = gtid + 1;
1924 void *itt_sync_obj = NULL;
1926 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
1931 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1933 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1934 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1938 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
1939 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
1942 kmp_flag_32<false, false> flag(
1943 RCAST(std::atomic<kmp_uint32> *,
1944 &(taskdata->td_incomplete_child_tasks)),
1946 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1947 flag.execute_tasks(thread, gtid, FALSE,
1948 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1949 __kmp_task_stealing_constraint);
1953 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
1954 KMP_FSYNC_ACQUIRED(taskdata);
1959 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1961 #if OMPT_SUPPORT && OMPT_OPTIONAL
1963 if (ompt_enabled.ompt_callback_sync_region_wait) {
1964 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1965 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1966 my_task_data, return_address);
1968 if (ompt_enabled.ompt_callback_sync_region) {
1969 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1970 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1971 my_task_data, return_address);
1973 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
1979 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1980 "returning TASK_CURRENT_NOT_QUEUED\n",
1983 return TASK_CURRENT_NOT_QUEUED;
1986 #if OMPT_SUPPORT && OMPT_OPTIONAL
1988 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1989 void *frame_address,
1990 void *return_address) {
1991 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1998 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1999 #if OMPT_SUPPORT && OMPT_OPTIONAL
2000 if (UNLIKELY(ompt_enabled.enabled)) {
2001 OMPT_STORE_RETURN_ADDRESS(gtid);
2002 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2003 OMPT_LOAD_RETURN_ADDRESS(gtid));
2006 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2010 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2011 kmp_taskdata_t *taskdata = NULL;
2013 int thread_finished = FALSE;
2016 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2018 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2019 gtid, loc_ref, end_part));
2020 __kmp_assert_valid_gtid(gtid);
2022 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2023 thread = __kmp_threads[gtid];
2024 taskdata = thread->th.th_current_task;
2031 taskdata->td_taskwait_counter += 1;
2032 taskdata->td_taskwait_ident = loc_ref;
2033 taskdata->td_taskwait_thread = gtid + 1;
2036 void *itt_sync_obj = NULL;
2038 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2041 if (!taskdata->td_flags.team_serial) {
2042 kmp_task_team_t *task_team = thread->th.th_task_team;
2043 if (task_team != NULL) {
2044 if (KMP_TASKING_ENABLED(task_team)) {
2046 if (UNLIKELY(ompt_enabled.enabled))
2047 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2049 __kmp_execute_tasks_32(
2050 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2051 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2052 __kmp_task_stealing_constraint);
2054 if (UNLIKELY(ompt_enabled.enabled))
2055 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2061 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2066 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2069 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2070 "returning TASK_CURRENT_NOT_QUEUED\n",
2073 return TASK_CURRENT_NOT_QUEUED;
2094 unsigned reserved31 : 31;
2174 template <
typename T>
2175 void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2176 __kmp_assert_valid_gtid(gtid);
2177 kmp_info_t *thread = __kmp_threads[gtid];
2178 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2179 kmp_uint32 nth = thread->th.th_team_nproc;
2183 KMP_ASSERT(tg != NULL);
2184 KMP_ASSERT(data != NULL);
2185 KMP_ASSERT(num > 0);
2187 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2191 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2195 for (
int i = 0; i < num; ++i) {
2196 size_t size = data[i].reduce_size - 1;
2198 size += CACHE_LINE - size % CACHE_LINE;
2199 KMP_ASSERT(data[i].reduce_comb != NULL);
2202 arr[i].
flags = data[i].flags;
2206 __kmp_assign_orig<T>(arr[i], data[i]);
2207 if (!arr[i].flags.lazy_priv) {
2210 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2211 if (arr[i].reduce_init != NULL) {
2213 for (
size_t j = 0; j < nth; ++j) {
2214 __kmp_call_init<T>(arr[i], j * size);
2221 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2224 tg->reduce_data = (
void *)arr;
2225 tg->reduce_num_data = num;
2264 template <
typename T>
2265 void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2266 kmp_taskgroup_t *tg,
void *reduce_data) {
2268 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2270 thr, tg, reduce_data));
2275 for (
int i = 0; i < num; ++i) {
2278 tg->reduce_data = (
void *)arr;
2279 tg->reduce_num_data = num;
2292 __kmp_assert_valid_gtid(gtid);
2293 kmp_info_t *thread = __kmp_threads[gtid];
2294 kmp_int32 nth = thread->th.th_team_nproc;
2298 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2300 tg = thread->th.th_current_task->td_taskgroup;
2301 KMP_ASSERT(tg != NULL);
2303 kmp_int32 num = tg->reduce_num_data;
2304 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2306 KMP_ASSERT(data != NULL);
2307 while (tg != NULL) {
2308 for (
int i = 0; i < num; ++i) {
2309 if (!arr[i].flags.lazy_priv) {
2310 if (data == arr[i].reduce_shar ||
2311 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2312 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2315 void **p_priv = (
void **)(arr[i].reduce_priv);
2316 if (data == arr[i].reduce_shar)
2319 for (
int j = 0; j < nth; ++j)
2320 if (data == p_priv[j])
2324 if (p_priv[tid] == NULL) {
2326 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2327 if (arr[i].reduce_init != NULL) {
2328 if (arr[i].reduce_orig != NULL) {
2330 p_priv[tid], arr[i].reduce_orig);
2332 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2341 num = tg->reduce_num_data;
2343 KMP_ASSERT2(0,
"Unknown task reduction item");
2349 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2350 kmp_int32 nth = th->th.th_team_nproc;
2351 KMP_DEBUG_ASSERT(nth > 1);
2353 kmp_int32 num = tg->reduce_num_data;
2354 for (
int i = 0; i < num; ++i) {
2356 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2357 void (*f_comb)(
void *,
void *) =
2359 if (!arr[i].flags.lazy_priv) {
2362 for (
int j = 0; j < nth; ++j) {
2363 void *priv_data = (
char *)pr_data + j * size;
2364 f_comb(sh_data, priv_data);
2369 void **pr_data = (
void **)(arr[i].reduce_priv);
2370 for (
int j = 0; j < nth; ++j) {
2371 if (pr_data[j] != NULL) {
2372 f_comb(sh_data, pr_data[j]);
2375 __kmp_free(pr_data[j]);
2379 __kmp_free(arr[i].reduce_priv);
2381 __kmp_thread_free(th, arr);
2382 tg->reduce_data = NULL;
2383 tg->reduce_num_data = 0;
2389 static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2390 __kmp_thread_free(th, tg->reduce_data);
2391 tg->reduce_data = NULL;
2392 tg->reduce_num_data = 0;
2395 template <
typename T>
2396 void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2398 __kmp_assert_valid_gtid(gtid);
2399 kmp_info_t *thr = __kmp_threads[gtid];
2400 kmp_int32 nth = thr->th.th_team_nproc;
2401 __kmpc_taskgroup(loc, gtid);
2404 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2405 gtid, thr->th.th_current_task->td_taskgroup));
2406 return (
void *)thr->th.th_current_task->td_taskgroup;
2408 kmp_team_t *team = thr->th.th_team;
2410 kmp_taskgroup_t *tg;
2411 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2412 if (reduce_data == NULL &&
2413 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2416 KMP_DEBUG_ASSERT(reduce_data == NULL);
2418 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2422 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2423 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2424 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2427 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2431 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2432 tg = thr->th.th_current_task->td_taskgroup;
2433 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2455 int num,
void *data) {
2456 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2476 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2489 __kmpc_end_taskgroup(loc, gtid);
2493 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2494 __kmp_assert_valid_gtid(gtid);
2495 kmp_info_t *thread = __kmp_threads[gtid];
2496 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2497 kmp_taskgroup_t *tg_new =
2498 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2499 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2500 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2501 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2502 tg_new->parent = taskdata->td_taskgroup;
2503 tg_new->reduce_data = NULL;
2504 tg_new->reduce_num_data = 0;
2505 tg_new->gomp_data = NULL;
2506 taskdata->td_taskgroup = tg_new;
2508 #if OMPT_SUPPORT && OMPT_OPTIONAL
2509 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2510 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2512 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2513 kmp_team_t *team = thread->th.th_team;
2514 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2516 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2518 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2519 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2520 &(my_task_data), codeptr);
2527 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2528 __kmp_assert_valid_gtid(gtid);
2529 kmp_info_t *thread = __kmp_threads[gtid];
2530 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2531 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2532 int thread_finished = FALSE;
2534 #if OMPT_SUPPORT && OMPT_OPTIONAL
2536 ompt_data_t my_task_data;
2537 ompt_data_t my_parallel_data;
2538 void *codeptr =
nullptr;
2539 if (UNLIKELY(ompt_enabled.enabled)) {
2540 team = thread->th.th_team;
2541 my_task_data = taskdata->ompt_task_info.task_data;
2543 my_parallel_data = team->t.ompt_team_info.parallel_data;
2544 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2546 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2550 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2551 KMP_DEBUG_ASSERT(taskgroup != NULL);
2552 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2554 if (__kmp_tasking_mode != tskm_immediate_exec) {
2556 taskdata->td_taskwait_counter += 1;
2557 taskdata->td_taskwait_ident = loc;
2558 taskdata->td_taskwait_thread = gtid + 1;
2562 void *itt_sync_obj = NULL;
2564 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2568 #if OMPT_SUPPORT && OMPT_OPTIONAL
2569 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2570 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2571 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2572 &(my_task_data), codeptr);
2576 if (!taskdata->td_flags.team_serial ||
2577 (thread->th.th_task_team != NULL &&
2578 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2579 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2580 kmp_flag_32<false, false> flag(
2581 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2582 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2583 flag.execute_tasks(thread, gtid, FALSE,
2584 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2585 __kmp_task_stealing_constraint);
2588 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2590 #if OMPT_SUPPORT && OMPT_OPTIONAL
2591 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2592 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2593 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2594 &(my_task_data), codeptr);
2599 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2600 KMP_FSYNC_ACQUIRED(taskdata);
2603 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2605 if (taskgroup->reduce_data != NULL &&
2606 !taskgroup->gomp_data) {
2609 kmp_team_t *t = thread->th.th_team;
2613 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2616 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2617 if (cnt == thread->th.th_team_nproc - 1) {
2620 __kmp_task_reduction_fini(thread, taskgroup);
2623 __kmp_thread_free(thread, reduce_data);
2624 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2625 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2629 __kmp_task_reduction_clean(thread, taskgroup);
2631 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2635 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2636 if (cnt == thread->th.th_team_nproc - 1) {
2638 __kmp_task_reduction_fini(thread, taskgroup);
2641 __kmp_thread_free(thread, reduce_data);
2642 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2643 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2647 __kmp_task_reduction_clean(thread, taskgroup);
2651 __kmp_task_reduction_fini(thread, taskgroup);
2655 taskdata->td_taskgroup = taskgroup->parent;
2656 __kmp_thread_free(thread, taskgroup);
2658 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2661 #if OMPT_SUPPORT && OMPT_OPTIONAL
2662 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2663 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2664 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2665 &(my_task_data), codeptr);
2671 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2672 kmp_task_team_t *task_team,
2673 kmp_int32 is_constrained) {
2675 kmp_taskdata_t *taskdata;
2676 kmp_thread_data_t *thread_data;
2679 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2680 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2683 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2685 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2686 gtid, thread_data->td.td_deque_ntasks,
2687 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2689 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2691 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
2692 "ntasks=%d head=%u tail=%u\n",
2693 gtid, thread_data->td.td_deque_ntasks,
2694 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2698 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2700 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2701 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2703 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
2704 "ntasks=%d head=%u tail=%u\n",
2705 gtid, thread_data->td.td_deque_ntasks,
2706 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2710 tail = (thread_data->td.td_deque_tail - 1) &
2711 TASK_DEQUE_MASK(thread_data->td);
2712 taskdata = thread_data->td.td_deque[tail];
2714 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2715 thread->th.th_current_task)) {
2717 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2719 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
2720 "ntasks=%d head=%u tail=%u\n",
2721 gtid, thread_data->td.td_deque_ntasks,
2722 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2726 thread_data->td.td_deque_tail = tail;
2727 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2729 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2731 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
2732 "ntasks=%d head=%u tail=%u\n",
2733 gtid, taskdata, thread_data->td.td_deque_ntasks,
2734 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2736 task = KMP_TASKDATA_TO_TASK(taskdata);
2743 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2744 kmp_task_team_t *task_team,
2745 std::atomic<kmp_int32> *unfinished_threads,
2746 int *thread_finished,
2747 kmp_int32 is_constrained) {
2749 kmp_taskdata_t *taskdata;
2750 kmp_taskdata_t *current;
2751 kmp_thread_data_t *victim_td, *threads_data;
2753 kmp_int32 victim_tid;
2755 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2757 threads_data = task_team->tt.tt_threads_data;
2758 KMP_DEBUG_ASSERT(threads_data != NULL);
2760 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2761 victim_td = &threads_data[victim_tid];
2763 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
2764 "task_team=%p ntasks=%d head=%u tail=%u\n",
2765 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2766 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2767 victim_td->td.td_deque_tail));
2769 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2770 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
2771 "task_team=%p ntasks=%d head=%u tail=%u\n",
2772 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2773 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2774 victim_td->td.td_deque_tail));
2778 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2780 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2783 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2784 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
2785 "task_team=%p ntasks=%d head=%u tail=%u\n",
2786 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2787 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2791 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2792 current = __kmp_threads[gtid]->th.th_current_task;
2793 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2794 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2796 victim_td->td.td_deque_head =
2797 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2799 if (!task_team->tt.tt_untied_task_encountered) {
2801 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2802 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
2803 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2804 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2805 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2810 target = victim_td->td.td_deque_head;
2812 for (i = 1; i < ntasks; ++i) {
2813 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2814 taskdata = victim_td->td.td_deque[target];
2815 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2821 if (taskdata == NULL) {
2823 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2824 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
2825 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2826 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2827 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2831 for (i = i + 1; i < ntasks; ++i) {
2833 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2834 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2838 victim_td->td.td_deque_tail ==
2839 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2840 victim_td->td.td_deque_tail = target;
2842 if (*thread_finished) {
2849 KMP_ATOMIC_INC(unfinished_threads);
2852 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2853 gtid, count + 1, task_team));
2854 *thread_finished = FALSE;
2856 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2858 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2862 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
2863 "task_team=%p ntasks=%d head=%u tail=%u\n",
2864 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2865 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2867 task = KMP_TASKDATA_TO_TASK(taskdata);
2881 static inline int __kmp_execute_tasks_template(
2882 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2883 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2884 kmp_int32 is_constrained) {
2885 kmp_task_team_t *task_team = thread->th.th_task_team;
2886 kmp_thread_data_t *threads_data;
2888 kmp_info_t *other_thread;
2889 kmp_taskdata_t *current_task = thread->th.th_current_task;
2890 std::atomic<kmp_int32> *unfinished_threads;
2891 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2892 tid = thread->th.th_info.ds.ds_tid;
2894 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2895 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2897 if (task_team == NULL || current_task == NULL)
2900 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
2901 "*thread_finished=%d\n",
2902 gtid, final_spin, *thread_finished));
2904 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2905 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2907 KMP_DEBUG_ASSERT(threads_data != NULL);
2909 nthreads = task_team->tt.tt_nproc;
2910 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2911 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
2912 task_team->tt.tt_hidden_helper_task_encountered);
2913 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2919 if (use_own_tasks) {
2920 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2922 if ((task == NULL) && (nthreads > 1)) {
2926 if (victim_tid == -2) {
2927 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2930 other_thread = threads_data[victim_tid].td.td_thr;
2932 if (victim_tid != -1) {
2934 }
else if (!new_victim) {
2940 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2941 if (victim_tid >= tid) {
2945 other_thread = threads_data[victim_tid].td.td_thr;
2955 if ((__kmp_tasking_mode == tskm_task_teams) &&
2956 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2957 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2960 __kmp_null_resume_wrapper(other_thread);
2973 task = __kmp_steal_task(other_thread, gtid, task_team,
2974 unfinished_threads, thread_finished,
2978 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2979 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2986 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2995 #if USE_ITT_BUILD && USE_ITT_NOTIFY
2996 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2997 if (itt_sync_obj == NULL) {
2999 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3001 __kmp_itt_task_starting(itt_sync_obj);
3004 __kmp_invoke_task(gtid, task, current_task);
3006 if (itt_sync_obj != NULL)
3007 __kmp_itt_task_finished(itt_sync_obj);
3014 if (flag == NULL || (!final_spin && flag->done_check())) {
3017 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3021 if (thread->th.th_task_team == NULL) {
3024 KMP_YIELD(__kmp_library == library_throughput);
3027 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3028 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3029 "other tasks, restart\n",
3040 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3044 if (!*thread_finished) {
3046 kmp_int32 count = -1 +
3048 KMP_ATOMIC_DEC(unfinished_threads);
3049 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3050 "unfinished_threads to %d task_team=%p\n",
3051 gtid, count, task_team));
3052 *thread_finished = TRUE;
3060 if (flag != NULL && flag->done_check()) {
3063 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3071 if (thread->th.th_task_team == NULL) {
3073 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3082 if (flag == NULL || (!final_spin && flag->done_check())) {
3084 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3091 if (nthreads == 1 &&
3092 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3096 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3102 template <
bool C,
bool S>
3103 int __kmp_execute_tasks_32(
3104 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3105 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3106 kmp_int32 is_constrained) {
3107 return __kmp_execute_tasks_template(
3108 thread, gtid, flag, final_spin,
3109 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3112 template <
bool C,
bool S>
3113 int __kmp_execute_tasks_64(
3114 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3115 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3116 kmp_int32 is_constrained) {
3117 return __kmp_execute_tasks_template(
3118 thread, gtid, flag, final_spin,
3119 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3122 template <
bool C,
bool S>
3123 int __kmp_atomic_execute_tasks_64(
3124 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3125 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3126 kmp_int32 is_constrained) {
3127 return __kmp_execute_tasks_template(
3128 thread, gtid, flag, final_spin,
3129 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3132 int __kmp_execute_tasks_oncore(
3133 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3134 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3135 kmp_int32 is_constrained) {
3136 return __kmp_execute_tasks_template(
3137 thread, gtid, flag, final_spin,
3138 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3142 __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3143 kmp_flag_32<false, false> *,
int,
3144 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3146 template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3147 kmp_flag_64<false, true> *,
3149 int *USE_ITT_BUILD_ARG(
void *),
3152 template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3153 kmp_flag_64<true, false> *,
3155 int *USE_ITT_BUILD_ARG(
void *),
3158 template int __kmp_atomic_execute_tasks_64<false, true>(
3159 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3160 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3162 template int __kmp_atomic_execute_tasks_64<true, false>(
3163 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3164 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3169 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3170 kmp_info_t *this_thr) {
3171 kmp_thread_data_t *threads_data;
3172 int nthreads, i, is_init_thread;
3174 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3175 __kmp_gtid_from_thread(this_thr)));
3177 KMP_DEBUG_ASSERT(task_team != NULL);
3178 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3180 nthreads = task_team->tt.tt_nproc;
3181 KMP_DEBUG_ASSERT(nthreads > 0);
3182 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3185 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3187 if (!is_init_thread) {
3191 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3192 __kmp_gtid_from_thread(this_thr)));
3195 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3196 KMP_DEBUG_ASSERT(threads_data != NULL);
3198 if (__kmp_tasking_mode == tskm_task_teams &&
3199 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3203 for (i = 0; i < nthreads; i++) {
3205 kmp_info_t *thread = threads_data[i].td.td_thr;
3207 if (i == this_thr->th.th_info.ds.ds_tid) {
3216 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3218 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3219 __kmp_gtid_from_thread(this_thr),
3220 __kmp_gtid_from_thread(thread)));
3221 __kmp_null_resume_wrapper(thread);
3223 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3224 __kmp_gtid_from_thread(this_thr),
3225 __kmp_gtid_from_thread(thread)));
3230 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3231 __kmp_gtid_from_thread(this_thr)));
3264 static kmp_task_team_t *__kmp_free_task_teams =
3267 kmp_bootstrap_lock_t __kmp_task_team_lock =
3268 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3275 static void __kmp_alloc_task_deque(kmp_info_t *thread,
3276 kmp_thread_data_t *thread_data) {
3277 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3278 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3281 thread_data->td.td_deque_last_stolen = -1;
3283 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3284 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3285 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3289 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3290 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3294 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3295 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3296 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3302 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3303 if (thread_data->td.td_deque != NULL) {
3304 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3305 TCW_4(thread_data->td.td_deque_ntasks, 0);
3306 __kmp_free(thread_data->td.td_deque);
3307 thread_data->td.td_deque = NULL;
3308 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3311 #ifdef BUILD_TIED_TASK_STACK
3313 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3314 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3326 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3327 kmp_task_team_t *task_team) {
3328 kmp_thread_data_t **threads_data_p;
3329 kmp_int32 nthreads, maxthreads;
3330 int is_init_thread = FALSE;
3332 if (TCR_4(task_team->tt.tt_found_tasks)) {
3337 threads_data_p = &task_team->tt.tt_threads_data;
3338 nthreads = task_team->tt.tt_nproc;
3339 maxthreads = task_team->tt.tt_max_threads;
3344 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3346 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3348 kmp_team_t *team = thread->th.th_team;
3351 is_init_thread = TRUE;
3352 if (maxthreads < nthreads) {
3354 if (*threads_data_p != NULL) {
3355 kmp_thread_data_t *old_data = *threads_data_p;
3356 kmp_thread_data_t *new_data = NULL;
3360 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3361 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3362 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3367 new_data = (kmp_thread_data_t *)__kmp_allocate(
3368 nthreads *
sizeof(kmp_thread_data_t));
3370 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3371 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3373 #ifdef BUILD_TIED_TASK_STACK
3375 for (i = maxthreads; i < nthreads; i++) {
3376 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3377 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3381 (*threads_data_p) = new_data;
3382 __kmp_free(old_data);
3384 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3385 "threads data for task_team %p, size = %d\n",
3386 __kmp_gtid_from_thread(thread), task_team, nthreads));
3390 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3391 nthreads *
sizeof(kmp_thread_data_t));
3392 #ifdef BUILD_TIED_TASK_STACK
3394 for (i = 0; i < nthreads; i++) {
3395 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3396 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3400 task_team->tt.tt_max_threads = nthreads;
3403 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3407 for (i = 0; i < nthreads; i++) {
3408 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3409 thread_data->td.td_thr = team->t.t_threads[i];
3411 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3415 thread_data->td.td_deque_last_stolen = -1;
3420 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3423 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3424 return is_init_thread;
3430 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3431 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3432 if (task_team->tt.tt_threads_data != NULL) {
3434 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3435 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3437 __kmp_free(task_team->tt.tt_threads_data);
3438 task_team->tt.tt_threads_data = NULL;
3440 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3447 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3449 kmp_task_team_t *task_team = NULL;
3452 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3453 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3455 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3457 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3458 if (__kmp_free_task_teams != NULL) {
3459 task_team = __kmp_free_task_teams;
3460 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3461 task_team->tt.tt_next = NULL;
3463 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3466 if (task_team == NULL) {
3467 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3468 "task team for team %p\n",
3469 __kmp_gtid_from_thread(thread), team));
3472 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3473 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3474 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3477 __itt_suppress_mark_range(
3478 __itt_suppress_range, __itt_suppress_threading_errors,
3479 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3480 __itt_suppress_mark_range(__itt_suppress_range,
3481 __itt_suppress_threading_errors,
3482 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3483 sizeof(task_team->tt.tt_active));
3491 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3492 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3493 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3494 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3496 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3497 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3498 TCW_4(task_team->tt.tt_active, TRUE);
3500 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
3501 "unfinished_threads init'd to %d\n",
3502 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3503 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3510 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3511 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3512 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3515 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3517 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3518 task_team->tt.tt_next = __kmp_free_task_teams;
3519 TCW_PTR(__kmp_free_task_teams, task_team);
3521 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3529 void __kmp_reap_task_teams(
void) {
3530 kmp_task_team_t *task_team;
3532 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3534 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3535 while ((task_team = __kmp_free_task_teams) != NULL) {
3536 __kmp_free_task_teams = task_team->tt.tt_next;
3537 task_team->tt.tt_next = NULL;
3540 if (task_team->tt.tt_threads_data != NULL) {
3541 __kmp_free_task_threads_data(task_team);
3543 __kmp_free(task_team);
3545 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3552 void __kmp_wait_to_unref_task_teams(
void) {
3558 KMP_INIT_YIELD(spins);
3559 KMP_INIT_BACKOFF(time);
3567 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3568 thread = thread->th.th_next_pool) {
3572 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3573 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3574 __kmp_gtid_from_thread(thread)));
3579 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3580 thread->th.th_task_team = NULL;
3587 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
3588 "unreference task_team\n",
3589 __kmp_gtid_from_thread(thread)));
3591 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3594 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3598 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3599 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3600 __kmp_null_resume_wrapper(thread);
3609 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
3615 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3616 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3622 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3623 (always || team->t.t_nproc > 1)) {
3624 team->t.t_task_team[this_thr->th.th_task_state] =
3625 __kmp_allocate_task_team(this_thr, team);
3626 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3627 " for team %d at parity=%d\n",
3628 __kmp_gtid_from_thread(this_thr),
3629 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
3630 this_thr->th.th_task_state));
3640 if (team->t.t_nproc > 1) {
3641 int other_team = 1 - this_thr->th.th_task_state;
3642 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
3643 if (team->t.t_task_team[other_team] == NULL) {
3644 team->t.t_task_team[other_team] =
3645 __kmp_allocate_task_team(this_thr, team);
3646 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
3647 "task_team %p for team %d at parity=%d\n",
3648 __kmp_gtid_from_thread(this_thr),
3649 team->t.t_task_team[other_team], team->t.t_id, other_team));
3652 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3653 if (!task_team->tt.tt_active ||
3654 team->t.t_nproc != task_team->tt.tt_nproc) {
3655 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3656 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3657 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3658 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3659 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3661 TCW_4(task_team->tt.tt_active, TRUE);
3665 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
3666 "%p for team %d at parity=%d\n",
3667 __kmp_gtid_from_thread(this_thr),
3668 team->t.t_task_team[other_team], team->t.t_id, other_team));
3676 if (this_thr == __kmp_hidden_helper_main_thread) {
3677 for (
int i = 0; i < 2; ++i) {
3678 kmp_task_team_t *task_team = team->t.t_task_team[i];
3679 if (KMP_TASKING_ENABLED(task_team)) {
3682 __kmp_enable_tasking(task_team, this_thr);
3683 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
3684 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
3685 if (thread_data->td.td_deque == NULL) {
3686 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
3696 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3697 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3701 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
3705 TCW_PTR(this_thr->th.th_task_team,
3706 team->t.t_task_team[this_thr->th.th_task_state]);
3708 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
3709 "%p from Team #%d (parity=%d)\n",
3710 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3711 team->t.t_id, this_thr->th.th_task_state));
3721 void __kmp_task_team_wait(
3722 kmp_info_t *this_thr,
3723 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3724 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3726 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3727 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3729 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3731 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
3732 "(for unfinished_threads to reach 0) on task_team = %p\n",
3733 __kmp_gtid_from_thread(this_thr), task_team));
3737 kmp_flag_32<false, false> flag(
3738 RCAST(std::atomic<kmp_uint32> *,
3739 &task_team->tt.tt_unfinished_threads),
3741 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3747 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
3748 "setting active to false, setting local and team's pointer to NULL\n",
3749 __kmp_gtid_from_thread(this_thr), task_team));
3750 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3751 task_team->tt.tt_found_proxy_tasks == TRUE ||
3752 task_team->tt.tt_hidden_helper_task_encountered == TRUE);
3753 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3754 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3755 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3756 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3759 TCW_PTR(this_thr->th.th_task_team, NULL);
3768 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3769 std::atomic<kmp_uint32> *spin = RCAST(
3770 std::atomic<kmp_uint32> *,
3771 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3773 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3776 KMP_FSYNC_SPIN_INIT(spin, NULL);
3778 kmp_flag_32<false, false> spin_flag(spin, 0U);
3779 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3780 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3783 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3786 if (TCR_4(__kmp_global.g.g_done)) {
3787 if (__kmp_global.g.g_abort)
3788 __kmp_abort_thread();
3794 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3803 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3805 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3806 kmp_task_team_t *task_team = taskdata->td_task_team;
3808 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3812 KMP_DEBUG_ASSERT(task_team != NULL);
3814 bool result =
false;
3815 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3817 if (thread_data->td.td_deque == NULL) {
3821 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3826 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3827 TASK_DEQUE_SIZE(thread_data->td)) {
3830 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3835 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3838 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3839 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3840 TASK_DEQUE_SIZE(thread_data->td)) {
3842 __kmp_realloc_task_deque(thread, thread_data);
3847 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3849 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3850 TASK_DEQUE_SIZE(thread_data->td)) {
3851 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
3857 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3858 goto release_and_exit;
3860 __kmp_realloc_task_deque(thread, thread_data);
3866 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3868 thread_data->td.td_deque_tail =
3869 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3870 TCW_4(thread_data->td.td_deque_ntasks,
3871 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3874 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3878 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3883 #define PROXY_TASK_FLAG 0x40000000
3900 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3901 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3902 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3903 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3904 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3906 taskdata->td_flags.complete = 1;
3908 if (taskdata->td_taskgroup)
3909 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3913 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
3916 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3918 kmp_int32 children = 0;
3922 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
3923 KMP_DEBUG_ASSERT(children >= 0);
3926 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
3929 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3930 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3931 kmp_info_t *thread = __kmp_threads[gtid];
3933 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3934 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3939 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
3940 PROXY_TASK_FLAG) > 0)
3943 __kmp_release_deps(gtid, taskdata);
3944 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3956 KMP_DEBUG_ASSERT(ptask != NULL);
3957 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3959 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3961 __kmp_assert_valid_gtid(gtid);
3962 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3964 __kmp_first_top_half_finish_proxy(taskdata);
3965 __kmp_second_top_half_finish_proxy(taskdata);
3966 __kmp_bottom_half_finish_proxy(gtid, ptask);
3969 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3973 void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
3974 KMP_DEBUG_ASSERT(ptask != NULL);
3975 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3979 kmp_team_t *team = taskdata->td_team;
3980 kmp_int32 nthreads = team->t.t_nproc;
3985 kmp_int32 start_k = start % nthreads;
3987 kmp_int32 k = start_k;
3991 thread = team->t.t_threads[k];
3992 k = (k + 1) % nthreads;
3998 }
while (!__kmp_give_task(thread, k, ptask, pass));
4009 KMP_DEBUG_ASSERT(ptask != NULL);
4010 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4014 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4017 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4019 __kmp_first_top_half_finish_proxy(taskdata);
4021 __kmpc_give_task(ptask);
4023 __kmp_second_top_half_finish_proxy(taskdata);
4027 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4031 kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4033 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4034 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4035 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4036 td->td_allow_completion_event.ed.task = task;
4037 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4039 return &td->td_allow_completion_event;
4042 void __kmp_fulfill_event(kmp_event_t *event) {
4043 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4044 kmp_task_t *ptask = event->ed.task;
4045 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4046 bool detached =
false;
4047 int gtid = __kmp_get_gtid();
4052 __kmp_acquire_tas_lock(&event->lock, gtid);
4053 if (taskdata->td_flags.proxy == TASK_PROXY) {
4059 if (UNLIKELY(ompt_enabled.enabled))
4060 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4063 event->type = KMP_EVENT_UNINITIALIZED;
4064 __kmp_release_tas_lock(&event->lock, gtid);
4070 if (UNLIKELY(ompt_enabled.enabled))
4071 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4075 kmp_team_t *team = taskdata->td_team;
4076 kmp_info_t *thread = __kmp_get_thread();
4077 if (thread->th.th_team == team) {
4095 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4097 kmp_taskdata_t *taskdata;
4098 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4099 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4100 size_t shareds_offset;
4103 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4105 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4107 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4108 task_size = taskdata_src->td_size_alloc;
4111 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4114 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4116 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4118 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4120 task = KMP_TASKDATA_TO_TASK(taskdata);
4123 taskdata->td_task_id = KMP_GEN_TASK_ID();
4124 if (task->shareds != NULL) {
4125 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4126 task->shareds = &((
char *)taskdata)[shareds_offset];
4127 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4130 taskdata->td_alloc_thread = thread;
4131 taskdata->td_parent = parent_task;
4133 taskdata->td_taskgroup = parent_task->td_taskgroup;
4136 if (taskdata->td_flags.tiedness == TASK_TIED)
4137 taskdata->td_last_tied = taskdata;
4141 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4142 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4143 if (parent_task->td_taskgroup)
4144 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4147 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4148 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4152 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4153 thread, taskdata, taskdata->td_parent));
4155 if (UNLIKELY(ompt_enabled.enabled))
4156 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4165 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4167 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4172 class kmp_taskloop_bounds_t {
4174 const kmp_taskdata_t *taskdata;
4175 size_t lower_offset;
4176 size_t upper_offset;
4179 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4180 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4181 lower_offset((char *)lb - (char *)task),
4182 upper_offset((char *)ub - (char *)task) {
4183 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4184 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4186 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4187 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4188 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4189 size_t get_lower_offset()
const {
return lower_offset; }
4190 size_t get_upper_offset()
const {
return upper_offset; }
4191 kmp_uint64 get_lb()
const {
4193 #if defined(KMP_GOMP_COMPAT)
4195 if (!taskdata->td_flags.native) {
4196 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4199 if (taskdata->td_size_loop_bounds == 4) {
4200 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4201 retval = (kmp_int64)*lb;
4203 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4204 retval = (kmp_int64)*lb;
4209 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4213 kmp_uint64 get_ub()
const {
4215 #if defined(KMP_GOMP_COMPAT)
4217 if (!taskdata->td_flags.native) {
4218 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4221 if (taskdata->td_size_loop_bounds == 4) {
4222 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4223 retval = (kmp_int64)*ub;
4225 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4226 retval = (kmp_int64)*ub;
4230 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4234 void set_lb(kmp_uint64 lb) {
4235 #if defined(KMP_GOMP_COMPAT)
4237 if (!taskdata->td_flags.native) {
4238 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4241 if (taskdata->td_size_loop_bounds == 4) {
4242 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4243 *lower = (kmp_uint32)lb;
4245 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4246 *lower = (kmp_uint64)lb;
4250 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4253 void set_ub(kmp_uint64 ub) {
4254 #if defined(KMP_GOMP_COMPAT)
4256 if (!taskdata->td_flags.native) {
4257 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4260 if (taskdata->td_size_loop_bounds == 4) {
4261 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4262 *upper = (kmp_uint32)ub;
4264 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4265 *upper = (kmp_uint64)ub;
4269 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4290 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4291 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4292 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4293 kmp_uint64 grainsize, kmp_uint64 extras,
4294 kmp_int64 last_chunk, kmp_uint64 tc,
4300 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4301 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4303 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4304 kmp_uint64 lower = task_bounds.get_lb();
4305 kmp_uint64 upper = task_bounds.get_ub();
4307 kmp_info_t *thread = __kmp_threads[gtid];
4308 kmp_taskdata_t *current_task = thread->th.th_current_task;
4309 kmp_task_t *next_task;
4310 kmp_int32 lastpriv = 0;
4312 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4313 (last_chunk < 0 ? last_chunk : extras));
4314 KMP_DEBUG_ASSERT(num_tasks > extras);
4315 KMP_DEBUG_ASSERT(num_tasks > 0);
4316 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4317 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4318 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4319 ub_glob, st, task_dup));
4322 for (i = 0; i < num_tasks; ++i) {
4323 kmp_uint64 chunk_minus_1;
4325 chunk_minus_1 = grainsize - 1;
4327 chunk_minus_1 = grainsize;
4330 upper = lower + st * chunk_minus_1;
4334 if (i == num_tasks - 1) {
4337 KMP_DEBUG_ASSERT(upper == *ub);
4338 if (upper == ub_glob)
4340 }
else if (st > 0) {
4341 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4342 if ((kmp_uint64)st > ub_glob - upper)
4345 KMP_DEBUG_ASSERT(upper + st < *ub);
4346 if (upper - ub_glob < (kmp_uint64)(-st))
4350 next_task = __kmp_task_dup_alloc(thread, task);
4351 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4352 kmp_taskloop_bounds_t next_task_bounds =
4353 kmp_taskloop_bounds_t(next_task, task_bounds);
4356 next_task_bounds.set_lb(lower);
4357 if (next_taskdata->td_flags.native) {
4358 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4360 next_task_bounds.set_ub(upper);
4362 if (ptask_dup != NULL)
4364 ptask_dup(next_task, task, lastpriv);
4366 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4367 "upper %lld stride %lld, (offsets %p %p)\n",
4368 gtid, i, next_task, lower, upper, st,
4369 next_task_bounds.get_lower_offset(),
4370 next_task_bounds.get_upper_offset()));
4372 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4375 __kmp_omp_task(gtid, next_task,
true);
4380 __kmp_task_start(gtid, task, current_task);
4382 __kmp_task_finish<false>(gtid, task, current_task);
4387 typedef struct __taskloop_params {
4394 kmp_uint64 num_tasks;
4395 kmp_uint64 grainsize;
4397 kmp_int64 last_chunk;
4399 kmp_uint64 num_t_min;
4403 } __taskloop_params_t;
4405 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4406 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4407 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4415 int __kmp_taskloop_task(
int gtid,
void *ptask) {
4416 __taskloop_params_t *p =
4417 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4418 kmp_task_t *task = p->task;
4419 kmp_uint64 *lb = p->lb;
4420 kmp_uint64 *ub = p->ub;
4421 void *task_dup = p->task_dup;
4423 kmp_int64 st = p->st;
4424 kmp_uint64 ub_glob = p->ub_glob;
4425 kmp_uint64 num_tasks = p->num_tasks;
4426 kmp_uint64 grainsize = p->grainsize;
4427 kmp_uint64 extras = p->extras;
4428 kmp_int64 last_chunk = p->last_chunk;
4429 kmp_uint64 tc = p->tc;
4430 kmp_uint64 num_t_min = p->num_t_min;
4432 void *codeptr_ra = p->codeptr_ra;
4435 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4436 KMP_DEBUG_ASSERT(task != NULL);
4438 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
4439 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4440 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4443 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4444 if (num_tasks > num_t_min)
4445 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4446 grainsize, extras, last_chunk, tc, num_t_min,
4452 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4453 grainsize, extras, last_chunk, tc,
4459 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4481 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4482 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4483 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4484 kmp_uint64 grainsize, kmp_uint64 extras,
4485 kmp_int64 last_chunk, kmp_uint64 tc,
4486 kmp_uint64 num_t_min,
4491 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4492 KMP_DEBUG_ASSERT(task != NULL);
4493 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4495 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
4496 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4497 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4499 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4500 kmp_uint64 lower = *lb;
4501 kmp_info_t *thread = __kmp_threads[gtid];
4503 kmp_task_t *next_task;
4504 size_t lower_offset =
4505 (
char *)lb - (
char *)task;
4506 size_t upper_offset =
4507 (
char *)ub - (
char *)task;
4509 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4510 (last_chunk < 0 ? last_chunk : extras));
4511 KMP_DEBUG_ASSERT(num_tasks > extras);
4512 KMP_DEBUG_ASSERT(num_tasks > 0);
4515 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4516 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4517 kmp_uint64 gr_size0 = grainsize;
4518 kmp_uint64 n_tsk0 = num_tasks >> 1;
4519 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4520 if (last_chunk < 0) {
4522 last_chunk1 = last_chunk;
4523 tc0 = grainsize * n_tsk0;
4525 }
else if (n_tsk0 <= extras) {
4528 ext1 = extras - n_tsk0;
4529 tc0 = gr_size0 * n_tsk0;
4534 tc1 = grainsize * n_tsk1;
4537 ub0 = lower + st * (tc0 - 1);
4541 next_task = __kmp_task_dup_alloc(thread, task);
4543 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4544 if (ptask_dup != NULL)
4545 ptask_dup(next_task, task, 0);
4550 kmp_taskdata_t *current_task = thread->th.th_current_task;
4551 thread->th.th_current_task = taskdata->td_parent;
4552 kmp_task_t *new_task =
4553 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4554 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4556 thread->th.th_current_task = current_task;
4557 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4558 p->task = next_task;
4559 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4560 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4561 p->task_dup = task_dup;
4563 p->ub_glob = ub_glob;
4564 p->num_tasks = n_tsk1;
4565 p->grainsize = grainsize;
4567 p->last_chunk = last_chunk1;
4569 p->num_t_min = num_t_min;
4571 p->codeptr_ra = codeptr_ra;
4576 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4578 __kmp_omp_task(gtid, new_task,
true);
4582 if (n_tsk0 > num_t_min)
4583 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4584 ext0, last_chunk0, tc0, num_t_min,
4590 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4591 gr_size0, ext0, last_chunk0, tc0,
4597 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4600 static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4601 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4602 int nogroup,
int sched, kmp_uint64 grainsize,
4603 int modifier,
void *task_dup) {
4604 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4605 KMP_DEBUG_ASSERT(task != NULL);
4607 #if OMPT_SUPPORT && OMPT_OPTIONAL
4608 OMPT_STORE_RETURN_ADDRESS(gtid);
4610 __kmpc_taskgroup(loc, gtid);
4615 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4618 kmp_uint64 lower = task_bounds.get_lb();
4619 kmp_uint64 upper = task_bounds.get_ub();
4620 kmp_uint64 ub_glob = upper;
4621 kmp_uint64 num_tasks = 0, extras = 0;
4622 kmp_int64 last_chunk =
4624 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4625 kmp_info_t *thread = __kmp_threads[gtid];
4626 kmp_taskdata_t *current_task = thread->th.th_current_task;
4628 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
4629 "grain %llu(%d, %d), dup %p\n",
4630 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
4635 tc = upper - lower + 1;
4636 }
else if (st < 0) {
4637 tc = (lower - upper) / (-st) + 1;
4639 tc = (upper - lower) / st + 1;
4642 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
4644 __kmp_task_start(gtid, task, current_task);
4646 __kmp_task_finish<false>(gtid, task, current_task);
4650 #if OMPT_SUPPORT && OMPT_OPTIONAL
4651 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4652 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4653 if (ompt_enabled.ompt_callback_work) {
4654 ompt_callbacks.ompt_callback(ompt_callback_work)(
4655 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4656 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4660 if (num_tasks_min == 0)
4663 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4669 grainsize = thread->th.th_team_nproc * 10;
4672 if (grainsize > tc) {
4677 num_tasks = grainsize;
4678 grainsize = tc / num_tasks;
4679 extras = tc % num_tasks;
4683 if (grainsize > tc) {
4689 num_tasks = (tc + grainsize - 1) / grainsize;
4690 last_chunk = tc - (num_tasks * grainsize);
4693 num_tasks = tc / grainsize;
4695 grainsize = tc / num_tasks;
4696 extras = tc % num_tasks;
4701 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4704 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4705 (last_chunk < 0 ? last_chunk : extras));
4706 KMP_DEBUG_ASSERT(num_tasks > extras);
4707 KMP_DEBUG_ASSERT(num_tasks > 0);
4713 taskdata->td_flags.task_serial = 1;
4714 taskdata->td_flags.tiedness = TASK_TIED;
4716 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4717 grainsize, extras, last_chunk, tc,
4719 OMPT_GET_RETURN_ADDRESS(0),
4724 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4725 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
4726 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4727 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4729 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4730 grainsize, extras, last_chunk, tc, num_tasks_min,
4732 OMPT_GET_RETURN_ADDRESS(0),
4736 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
4737 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4738 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4740 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4741 grainsize, extras, last_chunk, tc,
4743 OMPT_GET_RETURN_ADDRESS(0),
4748 #if OMPT_SUPPORT && OMPT_OPTIONAL
4749 if (ompt_enabled.ompt_callback_work) {
4750 ompt_callbacks.ompt_callback(ompt_callback_work)(
4751 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4752 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4757 #if OMPT_SUPPORT && OMPT_OPTIONAL
4758 OMPT_STORE_RETURN_ADDRESS(gtid);
4760 __kmpc_end_taskgroup(loc, gtid);
4762 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
4782 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4783 int sched, kmp_uint64 grainsize,
void *task_dup) {
4784 __kmp_assert_valid_gtid(gtid);
4785 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
4786 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4788 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
4809 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4810 int nogroup,
int sched, kmp_uint64 grainsize,
4811 int modifier,
void *task_dup) {
4812 __kmp_assert_valid_gtid(gtid);
4813 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
4814 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4815 modifier, task_dup);
4816 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
kmp_taskred_flags_t flags