14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117 int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
138 #ifdef KMP_TDATA_GTID
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227 int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
233 #ifdef KMP_TDATA_GTID
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270 void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336 void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344 #define MAX_MESSAGE 512
346 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356 #if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
412 void __kmp_warn(
char const *format, ...) {
413 char buffer[MAX_MESSAGE];
416 if (__kmp_generate_warnings == kmp_warnings_off) {
420 va_start(ap, format);
422 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
423 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
424 __kmp_vprintf(kmp_err, buffer, ap);
425 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
430 void __kmp_abort_process() {
432 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
434 if (__kmp_debug_buf) {
435 __kmp_dump_debug_buffer();
438 if (KMP_OS_WINDOWS) {
441 __kmp_global.g.g_abort = SIGABRT;
455 __kmp_unregister_library();
459 __kmp_infinite_loop();
460 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
464 void __kmp_abort_thread(
void) {
467 __kmp_infinite_loop();
473 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
474 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
477 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
478 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
481 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
483 __kmp_print_storage_map_gtid(
484 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
485 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
487 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
488 &thr->th.th_bar[bs_plain_barrier + 1],
489 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
492 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
493 &thr->th.th_bar[bs_forkjoin_barrier + 1],
494 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
497 #if KMP_FAST_REDUCTION_BARRIER
498 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
499 &thr->th.th_bar[bs_reduction_barrier + 1],
500 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
502 #endif // KMP_FAST_REDUCTION_BARRIER
508 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
509 int team_id,
int num_thr) {
510 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
511 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
515 &team->t.t_bar[bs_last_barrier],
516 sizeof(kmp_balign_team_t) * bs_last_barrier,
517 "%s_%d.t_bar", header, team_id);
519 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
520 &team->t.t_bar[bs_plain_barrier + 1],
521 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
524 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
525 &team->t.t_bar[bs_forkjoin_barrier + 1],
526 sizeof(kmp_balign_team_t),
527 "%s_%d.t_bar[forkjoin]", header, team_id);
529 #if KMP_FAST_REDUCTION_BARRIER
530 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
531 &team->t.t_bar[bs_reduction_barrier + 1],
532 sizeof(kmp_balign_team_t),
533 "%s_%d.t_bar[reduction]", header, team_id);
534 #endif // KMP_FAST_REDUCTION_BARRIER
536 __kmp_print_storage_map_gtid(
537 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
538 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
540 __kmp_print_storage_map_gtid(
541 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
542 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
544 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
545 &team->t.t_disp_buffer[num_disp_buff],
546 sizeof(dispatch_shared_info_t) * num_disp_buff,
547 "%s_%d.t_disp_buffer", header, team_id);
550 static void __kmp_init_allocator() {
551 __kmp_init_memkind();
552 __kmp_init_target_mem();
554 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
558 #if ENABLE_LIBOMPTARGET
559 static void __kmp_init_omptarget() {
560 __kmp_init_target_task();
569 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
574 case DLL_PROCESS_ATTACH:
575 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
579 case DLL_PROCESS_DETACH:
580 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
593 if (lpReserved == NULL)
594 __kmp_internal_end_library(__kmp_gtid_get_specific());
598 case DLL_THREAD_ATTACH:
599 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
605 case DLL_THREAD_DETACH:
606 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
608 __kmp_internal_end_thread(__kmp_gtid_get_specific());
619 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
620 int gtid = *gtid_ref;
621 #ifdef BUILD_PARALLEL_ORDERED
622 kmp_team_t *team = __kmp_team_from_gtid(gtid);
625 if (__kmp_env_consistency_check) {
626 if (__kmp_threads[gtid]->th.th_root->r.r_active)
627 #if KMP_USE_DYNAMIC_LOCK
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
630 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
633 #ifdef BUILD_PARALLEL_ORDERED
634 if (!team->t.t_serialized) {
636 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
644 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
645 int gtid = *gtid_ref;
646 #ifdef BUILD_PARALLEL_ORDERED
647 int tid = __kmp_tid_from_gtid(gtid);
648 kmp_team_t *team = __kmp_team_from_gtid(gtid);
651 if (__kmp_env_consistency_check) {
652 if (__kmp_threads[gtid]->th.th_root->r.r_active)
653 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
655 #ifdef BUILD_PARALLEL_ORDERED
656 if (!team->t.t_serialized) {
661 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
671 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
676 if (!TCR_4(__kmp_init_parallel))
677 __kmp_parallel_initialize();
678 __kmp_resume_if_soft_paused();
680 th = __kmp_threads[gtid];
681 team = th->th.th_team;
684 th->th.th_ident = id_ref;
686 if (team->t.t_serialized) {
689 kmp_int32 old_this = th->th.th_local.this_construct;
691 ++th->th.th_local.this_construct;
695 if (team->t.t_construct == old_this) {
696 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
697 th->th.th_local.this_construct);
700 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
701 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
702 team->t.t_active_level == 1) {
704 __kmp_itt_metadata_single(id_ref);
709 if (__kmp_env_consistency_check) {
710 if (status && push_ws) {
711 __kmp_push_workshare(gtid, ct_psingle, id_ref);
713 __kmp_check_workshare(gtid, ct_psingle, id_ref);
718 __kmp_itt_single_start(gtid);
724 void __kmp_exit_single(
int gtid) {
726 __kmp_itt_single_end(gtid);
728 if (__kmp_env_consistency_check)
729 __kmp_pop_workshare(gtid, ct_psingle, NULL);
738 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
739 int master_tid,
int set_nthreads,
743 KMP_DEBUG_ASSERT(__kmp_init_serial);
744 KMP_DEBUG_ASSERT(root && parent_team);
745 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
749 new_nthreads = set_nthreads;
750 if (!get__dynamic_2(parent_team, master_tid)) {
753 #ifdef USE_LOAD_BALANCE
754 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
755 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
756 if (new_nthreads == 1) {
757 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
758 "reservation to 1 thread\n",
762 if (new_nthreads < set_nthreads) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
764 "reservation to %d threads\n",
765 master_tid, new_nthreads));
769 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
770 new_nthreads = __kmp_avail_proc - __kmp_nth +
771 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
772 if (new_nthreads <= 1) {
773 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
774 "reservation to 1 thread\n",
778 if (new_nthreads < set_nthreads) {
779 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
780 "reservation to %d threads\n",
781 master_tid, new_nthreads));
783 new_nthreads = set_nthreads;
785 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
786 if (set_nthreads > 2) {
787 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
788 new_nthreads = (new_nthreads % set_nthreads) + 1;
789 if (new_nthreads == 1) {
790 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
791 "reservation to 1 thread\n",
795 if (new_nthreads < set_nthreads) {
796 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
797 "reservation to %d threads\n",
798 master_tid, new_nthreads));
806 if (__kmp_nth + new_nthreads -
807 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
809 int tl_nthreads = __kmp_max_nth - __kmp_nth +
810 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
811 if (tl_nthreads <= 0) {
816 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
817 __kmp_reserve_warn = 1;
818 __kmp_msg(kmp_ms_warning,
819 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
820 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
822 if (tl_nthreads == 1) {
823 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
824 "reduced reservation to 1 thread\n",
828 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
829 "reservation to %d threads\n",
830 master_tid, tl_nthreads));
831 new_nthreads = tl_nthreads;
835 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
836 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
837 if (cg_nthreads + new_nthreads -
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
840 int tl_nthreads = max_cg_threads - cg_nthreads +
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
842 if (tl_nthreads <= 0) {
847 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848 __kmp_reserve_warn = 1;
849 __kmp_msg(kmp_ms_warning,
850 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
853 if (tl_nthreads == 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
855 "reduced reservation to 1 thread\n",
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
860 "reservation to %d threads\n",
861 master_tid, tl_nthreads));
862 new_nthreads = tl_nthreads;
868 capacity = __kmp_threads_capacity;
869 if (TCR_PTR(__kmp_threads[0]) == NULL) {
875 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
876 capacity -= __kmp_hidden_helper_threads_num;
878 if (__kmp_nth + new_nthreads -
879 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
882 int slotsRequired = __kmp_nth + new_nthreads -
883 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
885 int slotsAdded = __kmp_expand_threads(slotsRequired);
886 if (slotsAdded < slotsRequired) {
888 new_nthreads -= (slotsRequired - slotsAdded);
889 KMP_ASSERT(new_nthreads >= 1);
892 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
893 __kmp_reserve_warn = 1;
894 if (__kmp_tp_cached) {
895 __kmp_msg(kmp_ms_warning,
896 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
897 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
898 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
900 __kmp_msg(kmp_ms_warning,
901 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
902 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
909 if (new_nthreads == 1) {
911 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
912 "dead roots and rechecking; requested %d threads\n",
913 __kmp_get_gtid(), set_nthreads));
915 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
917 __kmp_get_gtid(), new_nthreads, set_nthreads));
926 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
927 kmp_info_t *master_th,
int master_gtid,
928 int fork_teams_workers) {
932 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
933 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
937 master_th->th.th_info.ds.ds_tid = 0;
938 master_th->th.th_team = team;
939 master_th->th.th_team_nproc = team->t.t_nproc;
940 master_th->th.th_team_master = master_th;
941 master_th->th.th_team_serialized = FALSE;
942 master_th->th.th_dispatch = &team->t.t_dispatch[0];
945 #if KMP_NESTED_HOT_TEAMS
947 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
950 int level = team->t.t_active_level - 1;
951 if (master_th->th.th_teams_microtask) {
952 if (master_th->th.th_teams_size.nteams > 1) {
956 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
957 master_th->th.th_teams_level == team->t.t_level) {
962 if (level < __kmp_hot_teams_max_level) {
963 if (hot_teams[level].hot_team) {
965 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
969 hot_teams[level].hot_team = team;
970 hot_teams[level].hot_team_nth = team->t.t_nproc;
977 use_hot_team = team == root->r.r_hot_team;
982 team->t.t_threads[0] = master_th;
983 __kmp_initialize_info(master_th, team, 0, master_gtid);
986 for (i = 1; i < team->t.t_nproc; i++) {
989 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
990 team->t.t_threads[i] = thr;
991 KMP_DEBUG_ASSERT(thr);
992 KMP_DEBUG_ASSERT(thr->th.th_team == team);
994 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
995 "T#%d(%d:%d) join =%llu, plain=%llu\n",
996 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
997 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
998 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
999 team->t.t_bar[bs_plain_barrier].b_arrived));
1000 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1001 thr->th.th_teams_level = master_th->th.th_teams_level;
1002 thr->th.th_teams_size = master_th->th.th_teams_size;
1005 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1006 for (b = 0; b < bs_last_barrier; ++b) {
1007 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1008 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1010 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1016 #if KMP_AFFINITY_SUPPORTED
1020 if (!fork_teams_workers) {
1021 __kmp_partition_places(team);
1025 if (team->t.t_nproc > 1 &&
1026 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1027 team->t.b->update_num_threads(team->t.t_nproc);
1028 __kmp_add_threads_to_team(team, team->t.t_nproc);
1032 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1033 for (i = 0; i < team->t.t_nproc; i++) {
1034 kmp_info_t *thr = team->t.t_threads[i];
1035 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1036 thr->th.th_prev_level != team->t.t_level) {
1037 team->t.t_display_affinity = 1;
1046 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1050 inline static void propagateFPControl(kmp_team_t *team) {
1051 if (__kmp_inherit_fp_control) {
1052 kmp_int16 x87_fpu_control_word;
1056 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1057 __kmp_store_mxcsr(&mxcsr);
1058 mxcsr &= KMP_X86_MXCSR_MASK;
1069 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1070 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1073 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1077 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1083 inline static void updateHWFPControl(kmp_team_t *team) {
1084 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1087 kmp_int16 x87_fpu_control_word;
1089 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1090 __kmp_store_mxcsr(&mxcsr);
1091 mxcsr &= KMP_X86_MXCSR_MASK;
1093 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1094 __kmp_clear_x87_fpu_status_word();
1095 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1098 if (team->t.t_mxcsr != mxcsr) {
1099 __kmp_load_mxcsr(&team->t.t_mxcsr);
1104 #define propagateFPControl(x) ((void)0)
1105 #define updateHWFPControl(x) ((void)0)
1108 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1113 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1114 kmp_info_t *this_thr;
1115 kmp_team_t *serial_team;
1117 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1124 if (!TCR_4(__kmp_init_parallel))
1125 __kmp_parallel_initialize();
1126 __kmp_resume_if_soft_paused();
1128 this_thr = __kmp_threads[global_tid];
1129 serial_team = this_thr->th.th_serial_team;
1132 KMP_DEBUG_ASSERT(serial_team);
1135 if (__kmp_tasking_mode != tskm_immediate_exec) {
1137 this_thr->th.th_task_team ==
1138 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1139 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1141 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1142 "team %p, new task_team = NULL\n",
1143 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1144 this_thr->th.th_task_team = NULL;
1147 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1148 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1149 proc_bind = proc_bind_false;
1150 }
else if (proc_bind == proc_bind_default) {
1153 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1156 this_thr->th.th_set_proc_bind = proc_bind_default;
1159 this_thr->th.th_set_nproc = 0;
1162 ompt_data_t ompt_parallel_data = ompt_data_none;
1163 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1164 if (ompt_enabled.enabled &&
1165 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1167 ompt_task_info_t *parent_task_info;
1168 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1170 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1171 if (ompt_enabled.ompt_callback_parallel_begin) {
1174 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1175 &(parent_task_info->task_data), &(parent_task_info->frame),
1176 &ompt_parallel_data, team_size,
1177 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1180 #endif // OMPT_SUPPORT
1182 if (this_thr->th.th_team != serial_team) {
1184 int level = this_thr->th.th_team->t.t_level;
1186 if (serial_team->t.t_serialized) {
1189 kmp_team_t *new_team;
1191 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1194 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1198 proc_bind, &this_thr->th.th_current_task->td_icvs,
1199 0 USE_NESTED_HOT_ARG(NULL));
1200 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1201 KMP_ASSERT(new_team);
1204 new_team->t.t_threads[0] = this_thr;
1205 new_team->t.t_parent = this_thr->th.th_team;
1206 serial_team = new_team;
1207 this_thr->th.th_serial_team = serial_team;
1211 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1212 global_tid, serial_team));
1220 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1221 global_tid, serial_team));
1225 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1226 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1227 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1228 serial_team->t.t_ident = loc;
1229 serial_team->t.t_serialized = 1;
1230 serial_team->t.t_nproc = 1;
1231 serial_team->t.t_parent = this_thr->th.th_team;
1232 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1233 this_thr->th.th_team = serial_team;
1234 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1236 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1237 this_thr->th.th_current_task));
1238 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1239 this_thr->th.th_current_task->td_flags.executing = 0;
1241 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1246 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1247 &this_thr->th.th_current_task->td_parent->td_icvs);
1251 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1252 this_thr->th.th_current_task->td_icvs.nproc =
1253 __kmp_nested_nth.nth[level + 1];
1256 if (__kmp_nested_proc_bind.used &&
1257 (level + 1 < __kmp_nested_proc_bind.used)) {
1258 this_thr->th.th_current_task->td_icvs.proc_bind =
1259 __kmp_nested_proc_bind.bind_types[level + 1];
1263 serial_team->t.t_pkfn = (microtask_t)(~0);
1265 this_thr->th.th_info.ds.ds_tid = 0;
1268 this_thr->th.th_team_nproc = 1;
1269 this_thr->th.th_team_master = this_thr;
1270 this_thr->th.th_team_serialized = 1;
1272 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1273 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1274 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1276 propagateFPControl(serial_team);
1279 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1280 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1281 serial_team->t.t_dispatch->th_disp_buffer =
1282 (dispatch_private_info_t *)__kmp_allocate(
1283 sizeof(dispatch_private_info_t));
1285 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1292 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1293 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1294 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1295 ++serial_team->t.t_serialized;
1296 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1299 int level = this_thr->th.th_team->t.t_level;
1302 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1303 this_thr->th.th_current_task->td_icvs.nproc =
1304 __kmp_nested_nth.nth[level + 1];
1306 serial_team->t.t_level++;
1307 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1308 "of serial team %p to %d\n",
1309 global_tid, serial_team, serial_team->t.t_level));
1312 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1314 dispatch_private_info_t *disp_buffer =
1315 (dispatch_private_info_t *)__kmp_allocate(
1316 sizeof(dispatch_private_info_t));
1317 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1318 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1320 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1324 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1328 if (__kmp_display_affinity) {
1329 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1330 this_thr->th.th_prev_num_threads != 1) {
1332 __kmp_aux_display_affinity(global_tid, NULL);
1333 this_thr->th.th_prev_level = serial_team->t.t_level;
1334 this_thr->th.th_prev_num_threads = 1;
1338 if (__kmp_env_consistency_check)
1339 __kmp_push_parallel(global_tid, NULL);
1341 serial_team->t.ompt_team_info.master_return_address = codeptr;
1342 if (ompt_enabled.enabled &&
1343 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1344 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1345 OMPT_GET_FRAME_ADDRESS(0);
1347 ompt_lw_taskteam_t lw_taskteam;
1348 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1349 &ompt_parallel_data, codeptr);
1351 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1355 if (ompt_enabled.ompt_callback_implicit_task) {
1356 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1357 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1358 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1359 ompt_task_implicit);
1360 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1361 __kmp_tid_from_gtid(global_tid);
1365 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1366 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1367 OMPT_GET_FRAME_ADDRESS(0);
1373 static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1374 microtask_t microtask,
int level,
1375 int teams_level, kmp_va_list ap) {
1376 return (master_th->th.th_teams_microtask && ap &&
1377 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1382 static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1383 int teams_level, kmp_va_list ap) {
1384 return ((ap == NULL && active_level == 0) ||
1385 (ap && teams_level > 0 && teams_level == level));
1392 __kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1393 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1394 enum fork_context_e call_context, microtask_t microtask,
1395 launch_t invoker,
int master_set_numthreads,
int level,
1397 ompt_data_t ompt_parallel_data,
void *return_address,
1403 parent_team->t.t_ident = loc;
1404 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1405 parent_team->t.t_argc = argc;
1406 argv = (
void **)parent_team->t.t_argv;
1407 for (i = argc - 1; i >= 0; --i) {
1408 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1411 if (parent_team == master_th->th.th_serial_team) {
1414 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1416 if (call_context == fork_context_gnu) {
1419 parent_team->t.t_serialized--;
1424 parent_team->t.t_pkfn = microtask;
1429 void **exit_frame_p;
1430 ompt_data_t *implicit_task_data;
1431 ompt_lw_taskteam_t lw_taskteam;
1433 if (ompt_enabled.enabled) {
1434 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1435 &ompt_parallel_data, return_address);
1436 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1438 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1442 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1443 if (ompt_enabled.ompt_callback_implicit_task) {
1444 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1445 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1446 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1447 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1451 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1453 exit_frame_p = &dummy;
1459 parent_team->t.t_serialized--;
1462 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1463 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1464 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1473 if (ompt_enabled.enabled) {
1474 *exit_frame_p = NULL;
1475 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1476 if (ompt_enabled.ompt_callback_implicit_task) {
1477 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1478 ompt_scope_end, NULL, implicit_task_data, 1,
1479 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1481 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1482 __ompt_lw_taskteam_unlink(master_th);
1483 if (ompt_enabled.ompt_callback_parallel_end) {
1484 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1485 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1486 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1488 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1494 parent_team->t.t_pkfn = microtask;
1495 parent_team->t.t_invoke = invoker;
1496 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1497 parent_team->t.t_active_level++;
1498 parent_team->t.t_level++;
1499 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1506 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1509 if (ompt_enabled.enabled) {
1510 ompt_lw_taskteam_t lw_taskteam;
1511 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1513 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1518 if (master_set_numthreads) {
1519 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1521 kmp_info_t **other_threads = parent_team->t.t_threads;
1524 int old_proc = master_th->th.th_teams_size.nth;
1525 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1526 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1527 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1529 parent_team->t.t_nproc = master_set_numthreads;
1530 for (i = 0; i < master_set_numthreads; ++i) {
1531 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1535 master_th->th.th_set_nproc = 0;
1539 if (__kmp_debugging) {
1540 int nth = __kmp_omp_num_threads(loc);
1542 master_set_numthreads = nth;
1548 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1550 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1551 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1552 proc_bind = proc_bind_false;
1555 if (proc_bind == proc_bind_default) {
1556 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1562 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1563 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1564 master_th->th.th_current_task->td_icvs.proc_bind)) {
1565 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1568 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1570 if (proc_bind_icv != proc_bind_default &&
1571 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1572 kmp_info_t **other_threads = parent_team->t.t_threads;
1573 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1574 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1578 master_th->th.th_set_proc_bind = proc_bind_default;
1580 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1581 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1583 __kmp_forkjoin_frames_mode == 3 &&
1584 parent_team->t.t_active_level == 1
1585 && master_th->th.th_teams_size.nteams == 1) {
1586 kmp_uint64 tmp_time = __itt_get_timestamp();
1587 master_th->th.th_frame_time = tmp_time;
1588 parent_team->t.t_region_time = tmp_time;
1590 if (__itt_stack_caller_create_ptr) {
1591 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1593 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1596 #if KMP_AFFINITY_SUPPORTED
1597 __kmp_partition_places(parent_team);
1600 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1601 "master_th=%p, gtid=%d\n",
1602 root, parent_team, master_th, gtid));
1603 __kmp_internal_fork(loc, gtid, parent_team);
1604 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1605 "master_th=%p, gtid=%d\n",
1606 root, parent_team, master_th, gtid));
1608 if (call_context == fork_context_gnu)
1612 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1613 parent_team->t.t_id, parent_team->t.t_pkfn));
1615 if (!parent_team->t.t_invoke(gtid)) {
1616 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1618 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1619 parent_team->t.t_id, parent_team->t.t_pkfn));
1622 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1629 __kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1630 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1631 kmp_info_t *master_th, kmp_team_t *parent_team,
1633 ompt_data_t *ompt_parallel_data,
void **return_address,
1634 ompt_data_t **parent_task_data,
1642 #if KMP_OS_LINUX && \
1643 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1646 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1651 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1656 master_th->th.th_serial_team->t.t_pkfn = microtask;
1659 if (call_context == fork_context_intel) {
1661 master_th->th.th_serial_team->t.t_ident = loc;
1664 master_th->th.th_serial_team->t.t_level--;
1669 void **exit_frame_p;
1670 ompt_task_info_t *task_info;
1671 ompt_lw_taskteam_t lw_taskteam;
1673 if (ompt_enabled.enabled) {
1674 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1675 ompt_parallel_data, *return_address);
1677 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1679 task_info = OMPT_CUR_TASK_INFO(master_th);
1680 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1681 if (ompt_enabled.ompt_callback_implicit_task) {
1682 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1683 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1684 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1685 &(task_info->task_data), 1,
1686 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1690 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1692 exit_frame_p = &dummy;
1697 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1698 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1699 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1708 if (ompt_enabled.enabled) {
1709 *exit_frame_p = NULL;
1710 if (ompt_enabled.ompt_callback_implicit_task) {
1711 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1712 ompt_scope_end, NULL, &(task_info->task_data), 1,
1713 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1715 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1716 __ompt_lw_taskteam_unlink(master_th);
1717 if (ompt_enabled.ompt_callback_parallel_end) {
1718 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1719 ompt_parallel_data, *parent_task_data,
1720 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1722 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1725 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1726 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1727 team = master_th->th.th_team;
1729 team->t.t_invoke = invoker;
1730 __kmp_alloc_argv_entries(argc, team, TRUE);
1731 team->t.t_argc = argc;
1732 argv = (
void **)team->t.t_argv;
1734 for (i = argc - 1; i >= 0; --i)
1735 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1737 for (i = 0; i < argc; ++i)
1739 argv[i] = parent_team->t.t_argv[i];
1747 if (ompt_enabled.enabled) {
1748 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1749 if (ompt_enabled.ompt_callback_implicit_task) {
1750 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1751 ompt_scope_end, NULL, &(task_info->task_data), 0,
1752 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1754 if (ompt_enabled.ompt_callback_parallel_end) {
1755 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1756 ompt_parallel_data, *parent_task_data,
1757 OMPT_INVOKER(call_context) | ompt_parallel_league,
1760 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1765 for (i = argc - 1; i >= 0; --i)
1766 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1771 void **exit_frame_p;
1772 ompt_task_info_t *task_info;
1773 ompt_lw_taskteam_t lw_taskteam;
1774 ompt_data_t *implicit_task_data;
1776 if (ompt_enabled.enabled) {
1777 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1778 ompt_parallel_data, *return_address);
1779 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1781 task_info = OMPT_CUR_TASK_INFO(master_th);
1782 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1785 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1786 if (ompt_enabled.ompt_callback_implicit_task) {
1787 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1789 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1790 ompt_task_implicit);
1791 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1795 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1797 exit_frame_p = &dummy;
1802 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1803 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1804 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1813 if (ompt_enabled.enabled) {
1814 *exit_frame_p = NULL;
1815 if (ompt_enabled.ompt_callback_implicit_task) {
1816 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1817 ompt_scope_end, NULL, &(task_info->task_data), 1,
1818 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1821 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1822 __ompt_lw_taskteam_unlink(master_th);
1823 if (ompt_enabled.ompt_callback_parallel_end) {
1824 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1825 ompt_parallel_data, *parent_task_data,
1826 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1828 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1832 }
else if (call_context == fork_context_gnu) {
1834 if (ompt_enabled.enabled) {
1835 ompt_lw_taskteam_t lwt;
1836 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1839 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1840 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1846 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1849 KMP_ASSERT2(call_context < fork_context_last,
1850 "__kmp_serial_fork_call: unknown fork_context parameter");
1853 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1860 int __kmp_fork_call(
ident_t *loc,
int gtid,
1861 enum fork_context_e call_context,
1862 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1867 int master_this_cons;
1869 kmp_team_t *parent_team;
1870 kmp_info_t *master_th;
1874 int master_set_numthreads;
1878 #if KMP_NESTED_HOT_TEAMS
1879 kmp_hot_team_ptr_t **p_hot_teams;
1882 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1885 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1886 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1889 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1891 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1892 __kmp_stkpadding += (short)((kmp_int64)dummy);
1898 if (!TCR_4(__kmp_init_parallel))
1899 __kmp_parallel_initialize();
1900 __kmp_resume_if_soft_paused();
1905 master_th = __kmp_threads[gtid];
1907 parent_team = master_th->th.th_team;
1908 master_tid = master_th->th.th_info.ds.ds_tid;
1909 master_this_cons = master_th->th.th_local.this_construct;
1910 root = master_th->th.th_root;
1911 master_active = root->r.r_active;
1912 master_set_numthreads = master_th->th.th_set_nproc;
1915 ompt_data_t ompt_parallel_data = ompt_data_none;
1916 ompt_data_t *parent_task_data;
1917 ompt_frame_t *ompt_frame;
1918 void *return_address = NULL;
1920 if (ompt_enabled.enabled) {
1921 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1923 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1928 __kmp_assign_root_init_mask();
1931 level = parent_team->t.t_level;
1933 active_level = parent_team->t.t_active_level;
1935 teams_level = master_th->th.th_teams_level;
1936 #if KMP_NESTED_HOT_TEAMS
1937 p_hot_teams = &master_th->th.th_hot_teams;
1938 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1939 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1940 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1941 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1943 (*p_hot_teams)[0].hot_team_nth = 1;
1948 if (ompt_enabled.enabled) {
1949 if (ompt_enabled.ompt_callback_parallel_begin) {
1950 int team_size = master_set_numthreads
1951 ? master_set_numthreads
1952 : get__nproc_2(parent_team, master_tid);
1953 int flags = OMPT_INVOKER(call_context) |
1954 ((microtask == (microtask_t)__kmp_teams_master)
1955 ? ompt_parallel_league
1956 : ompt_parallel_team);
1957 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1958 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1961 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1965 master_th->th.th_ident = loc;
1968 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1969 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1970 call_context, microtask, invoker,
1971 master_set_numthreads, level,
1973 ompt_parallel_data, return_address,
1979 if (__kmp_tasking_mode != tskm_immediate_exec) {
1980 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1981 parent_team->t.t_task_team[master_th->th.th_task_state]);
1991 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1992 if ((!enter_teams &&
1993 (parent_team->t.t_active_level >=
1994 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1995 (__kmp_library == library_serial)) {
1996 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
1999 nthreads = master_set_numthreads
2000 ? master_set_numthreads
2002 : get__nproc_2(parent_team, master_tid);
2007 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2012 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2013 nthreads, enter_teams);
2014 if (nthreads == 1) {
2018 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2022 KMP_DEBUG_ASSERT(nthreads > 0);
2025 master_th->th.th_set_nproc = 0;
2027 if (nthreads == 1) {
2028 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2029 invoker, master_th, parent_team,
2031 &ompt_parallel_data, &return_address,
2039 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2040 "curtask=%p, curtask_max_aclevel=%d\n",
2041 parent_team->t.t_active_level, master_th,
2042 master_th->th.th_current_task,
2043 master_th->th.th_current_task->td_icvs.max_active_levels));
2047 master_th->th.th_current_task->td_flags.executing = 0;
2049 if (!master_th->th.th_teams_microtask || level > teams_level) {
2051 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2055 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2056 if ((level + 1 < __kmp_nested_nth.used) &&
2057 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2058 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2064 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2066 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2067 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2068 proc_bind = proc_bind_false;
2072 if (proc_bind == proc_bind_default) {
2073 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2076 if (master_th->th.th_teams_microtask &&
2077 microtask == (microtask_t)__kmp_teams_master) {
2078 proc_bind = __kmp_teams_proc_bind;
2084 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2085 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2086 master_th->th.th_current_task->td_icvs.proc_bind)) {
2089 if (!master_th->th.th_teams_microtask ||
2090 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2091 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2096 master_th->th.th_set_proc_bind = proc_bind_default;
2098 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2099 kmp_internal_control_t new_icvs;
2100 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2101 new_icvs.next = NULL;
2102 if (nthreads_icv > 0) {
2103 new_icvs.nproc = nthreads_icv;
2105 if (proc_bind_icv != proc_bind_default) {
2106 new_icvs.proc_bind = proc_bind_icv;
2110 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2111 team = __kmp_allocate_team(root, nthreads, nthreads,
2115 proc_bind, &new_icvs,
2116 argc USE_NESTED_HOT_ARG(master_th));
2117 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2118 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2121 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2122 team = __kmp_allocate_team(root, nthreads, nthreads,
2127 &master_th->th.th_current_task->td_icvs,
2128 argc USE_NESTED_HOT_ARG(master_th));
2129 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2130 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2131 &master_th->th.th_current_task->td_icvs);
2134 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2137 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2138 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2139 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2140 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2141 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2143 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2146 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2148 if (!master_th->th.th_teams_microtask || level > teams_level) {
2149 int new_level = parent_team->t.t_level + 1;
2150 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2151 new_level = parent_team->t.t_active_level + 1;
2152 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2155 int new_level = parent_team->t.t_level;
2156 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2157 new_level = parent_team->t.t_active_level;
2158 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2160 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2162 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2164 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2165 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2168 propagateFPControl(team);
2170 if (ompd_state & OMPD_ENABLE_BP)
2171 ompd_bp_parallel_begin();
2174 if (__kmp_tasking_mode != tskm_immediate_exec) {
2177 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2178 parent_team->t.t_task_team[master_th->th.th_task_state]);
2179 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2180 "%p, new task_team %p / team %p\n",
2181 __kmp_gtid_from_thread(master_th),
2182 master_th->th.th_task_team, parent_team,
2183 team->t.t_task_team[master_th->th.th_task_state], team));
2185 if (active_level || master_th->th.th_task_team) {
2187 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2188 if (master_th->th.th_task_state_top >=
2189 master_th->th.th_task_state_stack_sz) {
2190 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2191 kmp_uint8 *old_stack, *new_stack;
2193 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2194 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2195 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2197 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2201 old_stack = master_th->th.th_task_state_memo_stack;
2202 master_th->th.th_task_state_memo_stack = new_stack;
2203 master_th->th.th_task_state_stack_sz = new_size;
2204 __kmp_free(old_stack);
2208 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2209 master_th->th.th_task_state;
2210 master_th->th.th_task_state_top++;
2211 #if KMP_NESTED_HOT_TEAMS
2212 if (master_th->th.th_hot_teams &&
2213 active_level < __kmp_hot_teams_max_level &&
2214 team == master_th->th.th_hot_teams[active_level].hot_team) {
2216 master_th->th.th_task_state =
2218 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2221 master_th->th.th_task_state = 0;
2222 #if KMP_NESTED_HOT_TEAMS
2226 #if !KMP_NESTED_HOT_TEAMS
2227 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2228 (team == root->r.r_hot_team));
2234 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2235 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2237 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2238 (team->t.t_master_tid == 0 &&
2239 (team->t.t_parent == root->r.r_root_team ||
2240 team->t.t_parent->t.t_serialized)));
2244 argv = (
void **)team->t.t_argv;
2246 for (i = argc - 1; i >= 0; --i) {
2247 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2248 KMP_CHECK_UPDATE(*argv, new_argv);
2252 for (i = 0; i < argc; ++i) {
2254 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2259 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2260 if (!root->r.r_active)
2261 root->r.r_active = TRUE;
2263 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2264 __kmp_setup_icv_copy(team, nthreads,
2265 &master_th->th.th_current_task->td_icvs, loc);
2268 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2271 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2274 if (team->t.t_active_level == 1
2275 && !master_th->th.th_teams_microtask) {
2277 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2278 (__kmp_forkjoin_frames_mode == 3 ||
2279 __kmp_forkjoin_frames_mode == 1)) {
2280 kmp_uint64 tmp_time = 0;
2281 if (__itt_get_timestamp_ptr)
2282 tmp_time = __itt_get_timestamp();
2284 master_th->th.th_frame_time = tmp_time;
2285 if (__kmp_forkjoin_frames_mode == 3)
2286 team->t.t_region_time = tmp_time;
2290 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2291 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2293 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2299 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2302 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2303 root, team, master_th, gtid));
2306 if (__itt_stack_caller_create_ptr) {
2309 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2310 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2311 }
else if (parent_team->t.t_serialized) {
2316 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2317 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2325 __kmp_internal_fork(loc, gtid, team);
2326 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2327 "master_th=%p, gtid=%d\n",
2328 root, team, master_th, gtid));
2331 if (call_context == fork_context_gnu) {
2332 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2337 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2338 team->t.t_id, team->t.t_pkfn));
2341 #if KMP_STATS_ENABLED
2345 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2349 if (!team->t.t_invoke(gtid)) {
2350 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2353 #if KMP_STATS_ENABLED
2356 KMP_SET_THREAD_STATE(previous_state);
2360 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2361 team->t.t_id, team->t.t_pkfn));
2364 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2366 if (ompt_enabled.enabled) {
2367 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2375 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2378 thread->th.ompt_thread_info.state =
2379 ((team->t.t_serialized) ? ompt_state_work_serial
2380 : ompt_state_work_parallel);
2383 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2384 kmp_team_t *team, ompt_data_t *parallel_data,
2385 int flags,
void *codeptr) {
2386 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2387 if (ompt_enabled.ompt_callback_parallel_end) {
2388 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2389 parallel_data, &(task_info->task_data), flags, codeptr);
2392 task_info->frame.enter_frame = ompt_data_none;
2393 __kmp_join_restore_state(thread, team);
2397 void __kmp_join_call(
ident_t *loc,
int gtid
2400 enum fork_context_e fork_context
2404 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2406 kmp_team_t *parent_team;
2407 kmp_info_t *master_th;
2411 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2414 master_th = __kmp_threads[gtid];
2415 root = master_th->th.th_root;
2416 team = master_th->th.th_team;
2417 parent_team = team->t.t_parent;
2419 master_th->th.th_ident = loc;
2422 void *team_microtask = (
void *)team->t.t_pkfn;
2426 if (ompt_enabled.enabled &&
2427 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2428 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2433 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2434 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2435 "th_task_team = %p\n",
2436 __kmp_gtid_from_thread(master_th), team,
2437 team->t.t_task_team[master_th->th.th_task_state],
2438 master_th->th.th_task_team));
2439 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2440 team->t.t_task_team[master_th->th.th_task_state]);
2444 if (team->t.t_serialized) {
2445 if (master_th->th.th_teams_microtask) {
2447 int level = team->t.t_level;
2448 int tlevel = master_th->th.th_teams_level;
2449 if (level == tlevel) {
2453 }
else if (level == tlevel + 1) {
2457 team->t.t_serialized++;
2463 if (ompt_enabled.enabled) {
2464 if (fork_context == fork_context_gnu) {
2465 __ompt_lw_taskteam_unlink(master_th);
2467 __kmp_join_restore_state(master_th, parent_team);
2474 master_active = team->t.t_master_active;
2479 __kmp_internal_join(loc, gtid, team);
2481 if (__itt_stack_caller_create_ptr) {
2482 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2484 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2485 team->t.t_stack_id = NULL;
2489 master_th->th.th_task_state =
2492 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2493 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2497 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2498 parent_team->t.t_stack_id = NULL;
2506 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2507 void *codeptr = team->t.ompt_team_info.master_return_address;
2512 if (team->t.t_active_level == 1 &&
2513 (!master_th->th.th_teams_microtask ||
2514 master_th->th.th_teams_size.nteams == 1)) {
2515 master_th->th.th_ident = loc;
2518 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2519 __kmp_forkjoin_frames_mode == 3)
2520 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2521 master_th->th.th_frame_time, 0, loc,
2522 master_th->th.th_team_nproc, 1);
2523 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2524 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2525 __kmp_itt_region_joined(gtid);
2529 #if KMP_AFFINITY_SUPPORTED
2532 master_th->th.th_first_place = team->t.t_first_place;
2533 master_th->th.th_last_place = team->t.t_last_place;
2535 #endif // KMP_AFFINITY_SUPPORTED
2537 if (master_th->th.th_teams_microtask && !exit_teams &&
2538 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2539 team->t.t_level == master_th->th.th_teams_level + 1) {
2544 ompt_data_t ompt_parallel_data = ompt_data_none;
2545 if (ompt_enabled.enabled) {
2546 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2547 if (ompt_enabled.ompt_callback_implicit_task) {
2548 int ompt_team_size = team->t.t_nproc;
2549 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2550 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2551 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2553 task_info->frame.exit_frame = ompt_data_none;
2554 task_info->task_data = ompt_data_none;
2555 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2556 __ompt_lw_taskteam_unlink(master_th);
2561 team->t.t_active_level--;
2562 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2568 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2569 int old_num = master_th->th.th_team_nproc;
2570 int new_num = master_th->th.th_teams_size.nth;
2571 kmp_info_t **other_threads = team->t.t_threads;
2572 team->t.t_nproc = new_num;
2573 for (
int i = 0; i < old_num; ++i) {
2574 other_threads[i]->th.th_team_nproc = new_num;
2577 for (
int i = old_num; i < new_num; ++i) {
2579 KMP_DEBUG_ASSERT(other_threads[i]);
2580 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2581 for (
int b = 0; b < bs_last_barrier; ++b) {
2582 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2583 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2585 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2588 if (__kmp_tasking_mode != tskm_immediate_exec) {
2590 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2596 if (ompt_enabled.enabled) {
2597 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2598 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2606 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2607 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2609 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2614 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2616 if (!master_th->th.th_teams_microtask ||
2617 team->t.t_level > master_th->th.th_teams_level) {
2619 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2621 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2624 if (ompt_enabled.enabled) {
2625 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2626 if (ompt_enabled.ompt_callback_implicit_task) {
2627 int flags = (team_microtask == (
void *)__kmp_teams_master)
2629 : ompt_task_implicit;
2630 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2631 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2632 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2633 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2635 task_info->frame.exit_frame = ompt_data_none;
2636 task_info->task_data = ompt_data_none;
2640 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2642 __kmp_pop_current_task_from_thread(master_th);
2644 master_th->th.th_def_allocator = team->t.t_def_allocator;
2647 if (ompd_state & OMPD_ENABLE_BP)
2648 ompd_bp_parallel_end();
2650 updateHWFPControl(team);
2652 if (root->r.r_active != master_active)
2653 root->r.r_active = master_active;
2655 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2663 master_th->th.th_team = parent_team;
2664 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2665 master_th->th.th_team_master = parent_team->t.t_threads[0];
2666 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2669 if (parent_team->t.t_serialized &&
2670 parent_team != master_th->th.th_serial_team &&
2671 parent_team != root->r.r_root_team) {
2672 __kmp_free_team(root,
2673 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2674 master_th->th.th_serial_team = parent_team;
2677 if (__kmp_tasking_mode != tskm_immediate_exec) {
2678 if (master_th->th.th_task_state_top >
2680 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2682 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2683 master_th->th.th_task_state;
2684 --master_th->th.th_task_state_top;
2686 master_th->th.th_task_state =
2688 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2689 }
else if (team != root->r.r_hot_team) {
2694 master_th->th.th_task_state = 0;
2697 master_th->th.th_task_team =
2698 parent_team->t.t_task_team[master_th->th.th_task_state];
2700 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2701 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2708 master_th->th.th_current_task->td_flags.executing = 1;
2710 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2712 #if KMP_AFFINITY_SUPPORTED
2713 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2714 __kmp_reset_root_init_mask(gtid);
2719 OMPT_INVOKER(fork_context) |
2720 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2721 : ompt_parallel_team);
2722 if (ompt_enabled.enabled) {
2723 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2729 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2734 void __kmp_save_internal_controls(kmp_info_t *thread) {
2736 if (thread->th.th_team != thread->th.th_serial_team) {
2739 if (thread->th.th_team->t.t_serialized > 1) {
2742 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2745 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2746 thread->th.th_team->t.t_serialized) {
2751 kmp_internal_control_t *control =
2752 (kmp_internal_control_t *)__kmp_allocate(
2753 sizeof(kmp_internal_control_t));
2755 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2757 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2759 control->next = thread->th.th_team->t.t_control_stack_top;
2760 thread->th.th_team->t.t_control_stack_top = control;
2766 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2770 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2771 KMP_DEBUG_ASSERT(__kmp_init_serial);
2775 else if (new_nth > __kmp_max_nth)
2776 new_nth = __kmp_max_nth;
2779 thread = __kmp_threads[gtid];
2780 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2783 __kmp_save_internal_controls(thread);
2785 set__nproc(thread, new_nth);
2790 root = thread->th.th_root;
2791 if (__kmp_init_parallel && (!root->r.r_active) &&
2792 (root->r.r_hot_team->t.t_nproc > new_nth)
2793 #
if KMP_NESTED_HOT_TEAMS
2794 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2797 kmp_team_t *hot_team = root->r.r_hot_team;
2800 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2802 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2803 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2806 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2807 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2808 if (__kmp_tasking_mode != tskm_immediate_exec) {
2811 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2813 __kmp_free_thread(hot_team->t.t_threads[f]);
2814 hot_team->t.t_threads[f] = NULL;
2816 hot_team->t.t_nproc = new_nth;
2817 #if KMP_NESTED_HOT_TEAMS
2818 if (thread->th.th_hot_teams) {
2819 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2820 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2824 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2825 hot_team->t.b->update_num_threads(new_nth);
2826 __kmp_add_threads_to_team(hot_team, new_nth);
2829 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2832 for (f = 0; f < new_nth; f++) {
2833 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2834 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2837 hot_team->t.t_size_changed = -1;
2842 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2845 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2847 gtid, max_active_levels));
2848 KMP_DEBUG_ASSERT(__kmp_init_serial);
2851 if (max_active_levels < 0) {
2852 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2857 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2858 "max_active_levels for thread %d = (%d)\n",
2859 gtid, max_active_levels));
2862 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2867 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2868 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2869 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2875 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2876 "max_active_levels for thread %d = (%d)\n",
2877 gtid, max_active_levels));
2879 thread = __kmp_threads[gtid];
2881 __kmp_save_internal_controls(thread);
2883 set__max_active_levels(thread, max_active_levels);
2887 int __kmp_get_max_active_levels(
int gtid) {
2890 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2891 KMP_DEBUG_ASSERT(__kmp_init_serial);
2893 thread = __kmp_threads[gtid];
2894 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2895 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2896 "curtask_maxaclevel=%d\n",
2897 gtid, thread->th.th_current_task,
2898 thread->th.th_current_task->td_icvs.max_active_levels));
2899 return thread->th.th_current_task->td_icvs.max_active_levels;
2903 void __kmp_set_num_teams(
int num_teams) {
2905 __kmp_nteams = num_teams;
2907 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2909 void __kmp_set_teams_thread_limit(
int limit) {
2911 __kmp_teams_thread_limit = limit;
2913 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2915 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2916 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2919 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2921 kmp_sched_t orig_kind;
2924 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2925 gtid, (
int)kind, chunk));
2926 KMP_DEBUG_ASSERT(__kmp_init_serial);
2933 kind = __kmp_sched_without_mods(kind);
2935 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2936 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2938 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2939 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2941 kind = kmp_sched_default;
2945 thread = __kmp_threads[gtid];
2947 __kmp_save_internal_controls(thread);
2949 if (kind < kmp_sched_upper_std) {
2950 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2953 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2955 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2956 __kmp_sch_map[kind - kmp_sched_lower - 1];
2961 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2962 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2963 kmp_sched_lower - 2];
2965 __kmp_sched_apply_mods_intkind(
2966 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2967 if (kind == kmp_sched_auto || chunk < 1) {
2969 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2971 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2976 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2980 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2981 KMP_DEBUG_ASSERT(__kmp_init_serial);
2983 thread = __kmp_threads[gtid];
2985 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2986 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2988 case kmp_sch_static_greedy:
2989 case kmp_sch_static_balanced:
2990 *kind = kmp_sched_static;
2991 __kmp_sched_apply_mods_stdkind(kind, th_type);
2994 case kmp_sch_static_chunked:
2995 *kind = kmp_sched_static;
2997 case kmp_sch_dynamic_chunked:
2998 *kind = kmp_sched_dynamic;
3001 case kmp_sch_guided_iterative_chunked:
3002 case kmp_sch_guided_analytical_chunked:
3003 *kind = kmp_sched_guided;
3006 *kind = kmp_sched_auto;
3008 case kmp_sch_trapezoidal:
3009 *kind = kmp_sched_trapezoidal;
3011 #if KMP_STATIC_STEAL_ENABLED
3012 case kmp_sch_static_steal:
3013 *kind = kmp_sched_static_steal;
3017 KMP_FATAL(UnknownSchedulingType, th_type);
3020 __kmp_sched_apply_mods_stdkind(kind, th_type);
3021 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3024 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3030 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3031 KMP_DEBUG_ASSERT(__kmp_init_serial);
3038 thr = __kmp_threads[gtid];
3039 team = thr->th.th_team;
3040 ii = team->t.t_level;
3044 if (thr->th.th_teams_microtask) {
3046 int tlevel = thr->th.th_teams_level;
3049 KMP_DEBUG_ASSERT(ii >= tlevel);
3061 return __kmp_tid_from_gtid(gtid);
3063 dd = team->t.t_serialized;
3065 while (ii > level) {
3066 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3068 if ((team->t.t_serialized) && (!dd)) {
3069 team = team->t.t_parent;
3073 team = team->t.t_parent;
3074 dd = team->t.t_serialized;
3079 return (dd > 1) ? (0) : (team->t.t_master_tid);
3082 int __kmp_get_team_size(
int gtid,
int level) {
3088 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3089 KMP_DEBUG_ASSERT(__kmp_init_serial);
3096 thr = __kmp_threads[gtid];
3097 team = thr->th.th_team;
3098 ii = team->t.t_level;
3102 if (thr->th.th_teams_microtask) {
3104 int tlevel = thr->th.th_teams_level;
3107 KMP_DEBUG_ASSERT(ii >= tlevel);
3118 while (ii > level) {
3119 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3121 if (team->t.t_serialized && (!dd)) {
3122 team = team->t.t_parent;
3126 team = team->t.t_parent;
3131 return team->t.t_nproc;
3134 kmp_r_sched_t __kmp_get_schedule_global() {
3139 kmp_r_sched_t r_sched;
3145 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3146 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3149 r_sched.r_sched_type = __kmp_static;
3152 r_sched.r_sched_type = __kmp_guided;
3154 r_sched.r_sched_type = __kmp_sched;
3156 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3158 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3160 r_sched.chunk = KMP_DEFAULT_CHUNK;
3162 r_sched.chunk = __kmp_chunk;
3170 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3172 KMP_DEBUG_ASSERT(team);
3173 if (!realloc || argc > team->t.t_max_argc) {
3175 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3176 "current entries=%d\n",
3177 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3179 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3180 __kmp_free((
void *)team->t.t_argv);
3182 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3184 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3185 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3187 team->t.t_id, team->t.t_max_argc));
3188 team->t.t_argv = &team->t.t_inline_argv[0];
3189 if (__kmp_storage_map) {
3190 __kmp_print_storage_map_gtid(
3191 -1, &team->t.t_inline_argv[0],
3192 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3193 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3198 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3199 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3201 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3203 team->t.t_id, team->t.t_max_argc));
3205 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3206 if (__kmp_storage_map) {
3207 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3208 &team->t.t_argv[team->t.t_max_argc],
3209 sizeof(
void *) * team->t.t_max_argc,
3210 "team_%d.t_argv", team->t.t_id);
3216 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3218 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3220 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3221 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3222 sizeof(dispatch_shared_info_t) * num_disp_buff);
3223 team->t.t_dispatch =
3224 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3225 team->t.t_implicit_task_taskdata =
3226 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3227 team->t.t_max_nproc = max_nth;
3230 for (i = 0; i < num_disp_buff; ++i) {
3231 team->t.t_disp_buffer[i].buffer_index = i;
3232 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3236 static void __kmp_free_team_arrays(kmp_team_t *team) {
3239 for (i = 0; i < team->t.t_max_nproc; ++i) {
3240 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3241 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3242 team->t.t_dispatch[i].th_disp_buffer = NULL;
3245 #if KMP_USE_HIER_SCHED
3246 __kmp_dispatch_free_hierarchies(team);
3248 __kmp_free(team->t.t_threads);
3249 __kmp_free(team->t.t_disp_buffer);
3250 __kmp_free(team->t.t_dispatch);
3251 __kmp_free(team->t.t_implicit_task_taskdata);
3252 team->t.t_threads = NULL;
3253 team->t.t_disp_buffer = NULL;
3254 team->t.t_dispatch = NULL;
3255 team->t.t_implicit_task_taskdata = 0;
3258 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3259 kmp_info_t **oldThreads = team->t.t_threads;
3261 __kmp_free(team->t.t_disp_buffer);
3262 __kmp_free(team->t.t_dispatch);
3263 __kmp_free(team->t.t_implicit_task_taskdata);
3264 __kmp_allocate_team_arrays(team, max_nth);
3266 KMP_MEMCPY(team->t.t_threads, oldThreads,
3267 team->t.t_nproc *
sizeof(kmp_info_t *));
3269 __kmp_free(oldThreads);
3272 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3274 kmp_r_sched_t r_sched =
3275 __kmp_get_schedule_global();
3277 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3279 kmp_internal_control_t g_icvs = {
3281 (kmp_int8)__kmp_global.g.g_dynamic,
3283 (kmp_int8)__kmp_env_blocktime,
3285 __kmp_dflt_blocktime,
3290 __kmp_dflt_team_nth,
3294 __kmp_dflt_max_active_levels,
3298 __kmp_nested_proc_bind.bind_types[0],
3299 __kmp_default_device,
3306 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3308 kmp_internal_control_t gx_icvs;
3309 gx_icvs.serial_nesting_level =
3311 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3312 gx_icvs.next = NULL;
3317 static void __kmp_initialize_root(kmp_root_t *root) {
3319 kmp_team_t *root_team;
3320 kmp_team_t *hot_team;
3321 int hot_team_max_nth;
3322 kmp_r_sched_t r_sched =
3323 __kmp_get_schedule_global();
3324 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3325 KMP_DEBUG_ASSERT(root);
3326 KMP_ASSERT(!root->r.r_begin);
3329 __kmp_init_lock(&root->r.r_begin_lock);
3330 root->r.r_begin = FALSE;
3331 root->r.r_active = FALSE;
3332 root->r.r_in_parallel = 0;
3333 root->r.r_blocktime = __kmp_dflt_blocktime;
3334 #if KMP_AFFINITY_SUPPORTED
3335 root->r.r_affinity_assigned = FALSE;
3340 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3343 __kmp_allocate_team(root,
3349 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3351 USE_NESTED_HOT_ARG(NULL)
3356 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3359 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3361 root->r.r_root_team = root_team;
3362 root_team->t.t_control_stack_top = NULL;
3365 root_team->t.t_threads[0] = NULL;
3366 root_team->t.t_nproc = 1;
3367 root_team->t.t_serialized = 1;
3369 root_team->t.t_sched.sched = r_sched.sched;
3372 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3373 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3377 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3380 __kmp_allocate_team(root,
3382 __kmp_dflt_team_nth_ub * 2,
3386 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3388 USE_NESTED_HOT_ARG(NULL)
3390 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3392 root->r.r_hot_team = hot_team;
3393 root_team->t.t_control_stack_top = NULL;
3396 hot_team->t.t_parent = root_team;
3399 hot_team_max_nth = hot_team->t.t_max_nproc;
3400 for (f = 0; f < hot_team_max_nth; ++f) {
3401 hot_team->t.t_threads[f] = NULL;
3403 hot_team->t.t_nproc = 1;
3405 hot_team->t.t_sched.sched = r_sched.sched;
3406 hot_team->t.t_size_changed = 0;
3411 typedef struct kmp_team_list_item {
3412 kmp_team_p
const *entry;
3413 struct kmp_team_list_item *next;
3414 } kmp_team_list_item_t;
3415 typedef kmp_team_list_item_t *kmp_team_list_t;
3417 static void __kmp_print_structure_team_accum(
3418 kmp_team_list_t list,
3419 kmp_team_p
const *team
3429 KMP_DEBUG_ASSERT(list != NULL);
3434 __kmp_print_structure_team_accum(list, team->t.t_parent);
3435 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3439 while (l->next != NULL && l->entry != team) {
3442 if (l->next != NULL) {
3448 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3454 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3455 sizeof(kmp_team_list_item_t));
3462 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3465 __kmp_printf(
"%s", title);
3467 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3469 __kmp_printf(
" - (nil)\n");
3473 static void __kmp_print_structure_thread(
char const *title,
3474 kmp_info_p
const *thread) {
3475 __kmp_printf(
"%s", title);
3476 if (thread != NULL) {
3477 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3479 __kmp_printf(
" - (nil)\n");
3483 void __kmp_print_structure(
void) {
3485 kmp_team_list_t list;
3489 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3493 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3494 "Table\n------------------------------\n");
3497 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3498 __kmp_printf(
"%2d", gtid);
3499 if (__kmp_threads != NULL) {
3500 __kmp_printf(
" %p", __kmp_threads[gtid]);
3502 if (__kmp_root != NULL) {
3503 __kmp_printf(
" %p", __kmp_root[gtid]);
3510 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3512 if (__kmp_threads != NULL) {
3514 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3515 kmp_info_t
const *thread = __kmp_threads[gtid];
3516 if (thread != NULL) {
3517 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3518 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3519 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3520 __kmp_print_structure_team(
" Serial Team: ",
3521 thread->th.th_serial_team);
3522 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3523 __kmp_print_structure_thread(
" Primary: ",
3524 thread->th.th_team_master);
3525 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3526 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3527 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3528 __kmp_print_structure_thread(
" Next in pool: ",
3529 thread->th.th_next_pool);
3531 __kmp_print_structure_team_accum(list, thread->th.th_team);
3532 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3536 __kmp_printf(
"Threads array is not allocated.\n");
3540 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3542 if (__kmp_root != NULL) {
3544 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3545 kmp_root_t
const *root = __kmp_root[gtid];
3547 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3548 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3549 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3550 __kmp_print_structure_thread(
" Uber Thread: ",
3551 root->r.r_uber_thread);
3552 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3553 __kmp_printf(
" In Parallel: %2d\n",
3554 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3556 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3557 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3561 __kmp_printf(
"Ubers array is not allocated.\n");
3564 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3566 while (list->next != NULL) {
3567 kmp_team_p
const *team = list->entry;
3569 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3570 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3571 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3572 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3573 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3574 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3575 for (i = 0; i < team->t.t_nproc; ++i) {
3576 __kmp_printf(
" Thread %2d: ", i);
3577 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3579 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3585 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3587 __kmp_print_structure_thread(
"Thread pool: ",
3588 CCAST(kmp_info_t *, __kmp_thread_pool));
3589 __kmp_print_structure_team(
"Team pool: ",
3590 CCAST(kmp_team_t *, __kmp_team_pool));
3594 while (list != NULL) {
3595 kmp_team_list_item_t *item = list;
3597 KMP_INTERNAL_FREE(item);
3606 static const unsigned __kmp_primes[] = {
3607 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3608 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3609 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3610 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3611 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3612 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3613 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3614 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3615 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3616 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3617 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3621 unsigned short __kmp_get_random(kmp_info_t *thread) {
3622 unsigned x = thread->th.th_x;
3623 unsigned short r = (
unsigned short)(x >> 16);
3625 thread->th.th_x = x * thread->th.th_a + 1;
3627 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3628 thread->th.th_info.ds.ds_tid, r));
3634 void __kmp_init_random(kmp_info_t *thread) {
3635 unsigned seed = thread->th.th_info.ds.ds_tid;
3638 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3639 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3641 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3647 static int __kmp_reclaim_dead_roots(
void) {
3650 for (i = 0; i < __kmp_threads_capacity; ++i) {
3651 if (KMP_UBER_GTID(i) &&
3652 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3655 r += __kmp_unregister_root_other_thread(i);
3680 static int __kmp_expand_threads(
int nNeed) {
3682 int minimumRequiredCapacity;
3684 kmp_info_t **newThreads;
3685 kmp_root_t **newRoot;
3691 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3694 added = __kmp_reclaim_dead_roots();
3723 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3726 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3730 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3732 newCapacity = __kmp_threads_capacity;
3734 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3735 : __kmp_sys_max_nth;
3736 }
while (newCapacity < minimumRequiredCapacity);
3737 newThreads = (kmp_info_t **)__kmp_allocate(
3738 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3740 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3741 KMP_MEMCPY(newThreads, __kmp_threads,
3742 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3743 KMP_MEMCPY(newRoot, __kmp_root,
3744 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3747 kmp_old_threads_list_t *node =
3748 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3749 node->threads = __kmp_threads;
3750 node->next = __kmp_old_threads_list;
3751 __kmp_old_threads_list = node;
3753 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3754 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3755 added += newCapacity - __kmp_threads_capacity;
3756 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3758 if (newCapacity > __kmp_tp_capacity) {
3759 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3760 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3761 __kmp_threadprivate_resize_cache(newCapacity);
3763 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3765 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3774 int __kmp_register_root(
int initial_thread) {
3775 kmp_info_t *root_thread;
3779 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3780 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3797 capacity = __kmp_threads_capacity;
3798 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3805 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3806 capacity -= __kmp_hidden_helper_threads_num;
3810 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3811 if (__kmp_tp_cached) {
3812 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3813 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3814 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3816 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3826 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3829 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3830 gtid <= __kmp_hidden_helper_threads_num;
3833 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3834 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3835 "hidden helper thread: T#%d\n",
3841 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3844 for (gtid = __kmp_hidden_helper_threads_num + 1;
3845 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3849 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3850 KMP_ASSERT(gtid < __kmp_threads_capacity);
3855 TCW_4(__kmp_nth, __kmp_nth + 1);
3859 if (__kmp_adjust_gtid_mode) {
3860 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3861 if (TCR_4(__kmp_gtid_mode) != 2) {
3862 TCW_4(__kmp_gtid_mode, 2);
3865 if (TCR_4(__kmp_gtid_mode) != 1) {
3866 TCW_4(__kmp_gtid_mode, 1);
3871 #ifdef KMP_ADJUST_BLOCKTIME
3874 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3875 if (__kmp_nth > __kmp_avail_proc) {
3876 __kmp_zero_bt = TRUE;
3882 if (!(root = __kmp_root[gtid])) {
3883 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3884 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3887 #if KMP_STATS_ENABLED
3889 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3890 __kmp_stats_thread_ptr->startLife();
3891 KMP_SET_THREAD_STATE(SERIAL_REGION);
3894 __kmp_initialize_root(root);
3897 if (root->r.r_uber_thread) {
3898 root_thread = root->r.r_uber_thread;
3900 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3901 if (__kmp_storage_map) {
3902 __kmp_print_thread_storage_map(root_thread, gtid);
3904 root_thread->th.th_info.ds.ds_gtid = gtid;
3906 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3908 root_thread->th.th_root = root;
3909 if (__kmp_env_consistency_check) {
3910 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3913 __kmp_initialize_fast_memory(root_thread);
3917 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3918 __kmp_initialize_bget(root_thread);
3920 __kmp_init_random(root_thread);
3924 if (!root_thread->th.th_serial_team) {
3925 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3926 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3927 root_thread->th.th_serial_team = __kmp_allocate_team(
3932 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3934 KMP_ASSERT(root_thread->th.th_serial_team);
3935 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3936 root_thread->th.th_serial_team));
3939 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3941 root->r.r_root_team->t.t_threads[0] = root_thread;
3942 root->r.r_hot_team->t.t_threads[0] = root_thread;
3943 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3945 root_thread->th.th_serial_team->t.t_serialized = 0;
3946 root->r.r_uber_thread = root_thread;
3949 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3950 TCW_4(__kmp_init_gtid, TRUE);
3953 __kmp_gtid_set_specific(gtid);
3956 __kmp_itt_thread_name(gtid);
3959 #ifdef KMP_TDATA_GTID
3962 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3963 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3965 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3967 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3968 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3969 KMP_INIT_BARRIER_STATE));
3972 for (b = 0; b < bs_last_barrier; ++b) {
3973 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3975 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3979 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3980 KMP_INIT_BARRIER_STATE);
3982 #if KMP_AFFINITY_SUPPORTED
3983 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3984 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3985 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3986 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3988 root_thread->th.th_def_allocator = __kmp_def_allocator;
3989 root_thread->th.th_prev_level = 0;
3990 root_thread->th.th_prev_num_threads = 1;
3992 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3993 tmp->cg_root = root_thread;
3994 tmp->cg_thread_limit = __kmp_cg_max_nth;
3995 tmp->cg_nthreads = 1;
3996 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3997 " cg_nthreads init to 1\n",
4000 root_thread->th.th_cg_roots = tmp;
4002 __kmp_root_counter++;
4005 if (!initial_thread && ompt_enabled.enabled) {
4007 kmp_info_t *root_thread = ompt_get_thread();
4009 ompt_set_thread_state(root_thread, ompt_state_overhead);
4011 if (ompt_enabled.ompt_callback_thread_begin) {
4012 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4013 ompt_thread_initial, __ompt_get_thread_data_internal());
4015 ompt_data_t *task_data;
4016 ompt_data_t *parallel_data;
4017 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4019 if (ompt_enabled.ompt_callback_implicit_task) {
4020 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4021 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4024 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4028 if (ompd_state & OMPD_ENABLE_BP)
4029 ompd_bp_thread_begin();
4033 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4038 #if KMP_NESTED_HOT_TEAMS
4039 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4040 const int max_level) {
4042 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4043 if (!hot_teams || !hot_teams[level].hot_team) {
4046 KMP_DEBUG_ASSERT(level < max_level);
4047 kmp_team_t *team = hot_teams[level].hot_team;
4048 nth = hot_teams[level].hot_team_nth;
4050 if (level < max_level - 1) {
4051 for (i = 0; i < nth; ++i) {
4052 kmp_info_t *th = team->t.t_threads[i];
4053 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4054 if (i > 0 && th->th.th_hot_teams) {
4055 __kmp_free(th->th.th_hot_teams);
4056 th->th.th_hot_teams = NULL;
4060 __kmp_free_team(root, team, NULL);
4067 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4068 kmp_team_t *root_team = root->r.r_root_team;
4069 kmp_team_t *hot_team = root->r.r_hot_team;
4070 int n = hot_team->t.t_nproc;
4073 KMP_DEBUG_ASSERT(!root->r.r_active);
4075 root->r.r_root_team = NULL;
4076 root->r.r_hot_team = NULL;
4079 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4080 #if KMP_NESTED_HOT_TEAMS
4081 if (__kmp_hot_teams_max_level >
4083 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4084 kmp_info_t *th = hot_team->t.t_threads[i];
4085 if (__kmp_hot_teams_max_level > 1) {
4086 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4088 if (th->th.th_hot_teams) {
4089 __kmp_free(th->th.th_hot_teams);
4090 th->th.th_hot_teams = NULL;
4095 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4100 if (__kmp_tasking_mode != tskm_immediate_exec) {
4101 __kmp_wait_to_unref_task_teams();
4107 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4109 (LPVOID) & (root->r.r_uber_thread->th),
4110 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4111 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4115 if (ompd_state & OMPD_ENABLE_BP)
4116 ompd_bp_thread_end();
4120 ompt_data_t *task_data;
4121 ompt_data_t *parallel_data;
4122 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4124 if (ompt_enabled.ompt_callback_implicit_task) {
4125 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4126 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4128 if (ompt_enabled.ompt_callback_thread_end) {
4129 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4130 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4136 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4137 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4139 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4140 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4143 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4144 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4145 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4146 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4147 root->r.r_uber_thread->th.th_cg_roots = NULL;
4149 __kmp_reap_thread(root->r.r_uber_thread, 1);
4153 root->r.r_uber_thread = NULL;
4155 root->r.r_begin = FALSE;
4160 void __kmp_unregister_root_current_thread(
int gtid) {
4161 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4165 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4166 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4167 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4170 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4173 kmp_root_t *root = __kmp_root[gtid];
4175 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4176 KMP_ASSERT(KMP_UBER_GTID(gtid));
4177 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4178 KMP_ASSERT(root->r.r_active == FALSE);
4182 kmp_info_t *thread = __kmp_threads[gtid];
4183 kmp_team_t *team = thread->th.th_team;
4184 kmp_task_team_t *task_team = thread->th.th_task_team;
4187 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4188 task_team->tt.tt_hidden_helper_task_encountered)) {
4191 thread->th.ompt_thread_info.state = ompt_state_undefined;
4193 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4196 __kmp_reset_root(gtid, root);
4200 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4202 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4209 static int __kmp_unregister_root_other_thread(
int gtid) {
4210 kmp_root_t *root = __kmp_root[gtid];
4213 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4214 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4215 KMP_ASSERT(KMP_UBER_GTID(gtid));
4216 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4217 KMP_ASSERT(root->r.r_active == FALSE);
4219 r = __kmp_reset_root(gtid, root);
4221 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4227 void __kmp_task_info() {
4229 kmp_int32 gtid = __kmp_entry_gtid();
4230 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4231 kmp_info_t *this_thr = __kmp_threads[gtid];
4232 kmp_team_t *steam = this_thr->th.th_serial_team;
4233 kmp_team_t *team = this_thr->th.th_team;
4236 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4238 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4239 team->t.t_implicit_task_taskdata[tid].td_parent);
4246 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4247 int tid,
int gtid) {
4251 KMP_DEBUG_ASSERT(this_thr != NULL);
4252 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4253 KMP_DEBUG_ASSERT(team);
4254 KMP_DEBUG_ASSERT(team->t.t_threads);
4255 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4256 kmp_info_t *master = team->t.t_threads[0];
4257 KMP_DEBUG_ASSERT(master);
4258 KMP_DEBUG_ASSERT(master->th.th_root);
4262 TCW_SYNC_PTR(this_thr->th.th_team, team);
4264 this_thr->th.th_info.ds.ds_tid = tid;
4265 this_thr->th.th_set_nproc = 0;
4266 if (__kmp_tasking_mode != tskm_immediate_exec)
4269 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4271 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4272 this_thr->th.th_set_proc_bind = proc_bind_default;
4273 #if KMP_AFFINITY_SUPPORTED
4274 this_thr->th.th_new_place = this_thr->th.th_current_place;
4276 this_thr->th.th_root = master->th.th_root;
4279 this_thr->th.th_team_nproc = team->t.t_nproc;
4280 this_thr->th.th_team_master = master;
4281 this_thr->th.th_team_serialized = team->t.t_serialized;
4283 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4285 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4286 tid, gtid, this_thr, this_thr->th.th_current_task));
4288 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4291 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4292 tid, gtid, this_thr, this_thr->th.th_current_task));
4297 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4299 this_thr->th.th_local.this_construct = 0;
4301 if (!this_thr->th.th_pri_common) {
4302 this_thr->th.th_pri_common =
4303 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4304 if (__kmp_storage_map) {
4305 __kmp_print_storage_map_gtid(
4306 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4307 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4309 this_thr->th.th_pri_head = NULL;
4312 if (this_thr != master &&
4313 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4315 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4316 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4319 int i = tmp->cg_nthreads--;
4320 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4321 " on node %p of thread %p to %d\n",
4322 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4327 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4329 this_thr->th.th_cg_roots->cg_nthreads++;
4330 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4331 " node %p of thread %p to %d\n",
4332 this_thr, this_thr->th.th_cg_roots,
4333 this_thr->th.th_cg_roots->cg_root,
4334 this_thr->th.th_cg_roots->cg_nthreads));
4335 this_thr->th.th_current_task->td_icvs.thread_limit =
4336 this_thr->th.th_cg_roots->cg_thread_limit;
4341 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4344 sizeof(dispatch_private_info_t) *
4345 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4346 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4347 team->t.t_max_nproc));
4348 KMP_ASSERT(dispatch);
4349 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4350 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4352 dispatch->th_disp_index = 0;
4353 dispatch->th_doacross_buf_idx = 0;
4354 if (!dispatch->th_disp_buffer) {
4355 dispatch->th_disp_buffer =
4356 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4358 if (__kmp_storage_map) {
4359 __kmp_print_storage_map_gtid(
4360 gtid, &dispatch->th_disp_buffer[0],
4361 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4363 : __kmp_dispatch_num_buffers],
4365 "th_%d.th_dispatch.th_disp_buffer "
4366 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4367 gtid, team->t.t_id, gtid);
4370 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4373 dispatch->th_dispatch_pr_current = 0;
4374 dispatch->th_dispatch_sh_current = 0;
4376 dispatch->th_deo_fcn = 0;
4377 dispatch->th_dxo_fcn = 0;
4380 this_thr->th.th_next_pool = NULL;
4382 if (!this_thr->th.th_task_state_memo_stack) {
4384 this_thr->th.th_task_state_memo_stack =
4385 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4386 this_thr->th.th_task_state_top = 0;
4387 this_thr->th.th_task_state_stack_sz = 4;
4388 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4390 this_thr->th.th_task_state_memo_stack[i] = 0;
4393 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4394 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4404 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4406 kmp_team_t *serial_team;
4407 kmp_info_t *new_thr;
4410 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4411 KMP_DEBUG_ASSERT(root && team);
4412 #if !KMP_NESTED_HOT_TEAMS
4413 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4418 if (__kmp_thread_pool) {
4419 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4420 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4421 if (new_thr == __kmp_thread_pool_insert_pt) {
4422 __kmp_thread_pool_insert_pt = NULL;
4424 TCW_4(new_thr->th.th_in_pool, FALSE);
4425 __kmp_suspend_initialize_thread(new_thr);
4426 __kmp_lock_suspend_mx(new_thr);
4427 if (new_thr->th.th_active_in_pool == TRUE) {
4428 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4429 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4430 new_thr->th.th_active_in_pool = FALSE;
4432 __kmp_unlock_suspend_mx(new_thr);
4434 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4435 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4436 KMP_ASSERT(!new_thr->th.th_team);
4437 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4440 __kmp_initialize_info(new_thr, team, new_tid,
4441 new_thr->th.th_info.ds.ds_gtid);
4442 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4444 TCW_4(__kmp_nth, __kmp_nth + 1);
4446 new_thr->th.th_task_state = 0;
4447 new_thr->th.th_task_state_top = 0;
4448 new_thr->th.th_task_state_stack_sz = 4;
4450 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4452 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4456 #ifdef KMP_ADJUST_BLOCKTIME
4459 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4460 if (__kmp_nth > __kmp_avail_proc) {
4461 __kmp_zero_bt = TRUE;
4470 kmp_balign_t *balign = new_thr->th.th_bar;
4471 for (b = 0; b < bs_last_barrier; ++b)
4472 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4475 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4476 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4483 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4484 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4489 if (!TCR_4(__kmp_init_monitor)) {
4490 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4491 if (!TCR_4(__kmp_init_monitor)) {
4492 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4493 TCW_4(__kmp_init_monitor, 1);
4494 __kmp_create_monitor(&__kmp_monitor);
4495 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4506 while (TCR_4(__kmp_init_monitor) < 2) {
4509 KF_TRACE(10, (
"after monitor thread has started\n"));
4512 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4519 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4521 : __kmp_hidden_helper_threads_num + 1;
4523 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4525 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4528 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4529 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4534 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4536 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4538 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4541 __itt_suppress_mark_range(
4542 __itt_suppress_range, __itt_suppress_threading_errors,
4543 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4544 __itt_suppress_mark_range(
4545 __itt_suppress_range, __itt_suppress_threading_errors,
4546 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4548 __itt_suppress_mark_range(
4549 __itt_suppress_range, __itt_suppress_threading_errors,
4550 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4552 __itt_suppress_mark_range(__itt_suppress_range,
4553 __itt_suppress_threading_errors,
4554 &new_thr->th.th_suspend_init_count,
4555 sizeof(new_thr->th.th_suspend_init_count));
4558 __itt_suppress_mark_range(__itt_suppress_range,
4559 __itt_suppress_threading_errors,
4560 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4561 sizeof(new_thr->th.th_bar[0].bb.b_go));
4562 __itt_suppress_mark_range(__itt_suppress_range,
4563 __itt_suppress_threading_errors,
4564 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4565 sizeof(new_thr->th.th_bar[1].bb.b_go));
4566 __itt_suppress_mark_range(__itt_suppress_range,
4567 __itt_suppress_threading_errors,
4568 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4569 sizeof(new_thr->th.th_bar[2].bb.b_go));
4571 if (__kmp_storage_map) {
4572 __kmp_print_thread_storage_map(new_thr, new_gtid);
4577 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4578 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4579 new_thr->th.th_serial_team = serial_team =
4580 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4584 proc_bind_default, &r_icvs,
4585 0 USE_NESTED_HOT_ARG(NULL));
4587 KMP_ASSERT(serial_team);
4588 serial_team->t.t_serialized = 0;
4590 serial_team->t.t_threads[0] = new_thr;
4592 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4596 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4599 __kmp_initialize_fast_memory(new_thr);
4603 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4604 __kmp_initialize_bget(new_thr);
4607 __kmp_init_random(new_thr);
4611 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4612 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4615 kmp_balign_t *balign = new_thr->th.th_bar;
4616 for (b = 0; b < bs_last_barrier; ++b) {
4617 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4618 balign[b].bb.team = NULL;
4619 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4620 balign[b].bb.use_oncore_barrier = 0;
4623 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4624 new_thr->th.th_sleep_loc_type = flag_unset;
4626 new_thr->th.th_spin_here = FALSE;
4627 new_thr->th.th_next_waiting = 0;
4629 new_thr->th.th_blocking =
false;
4632 #if KMP_AFFINITY_SUPPORTED
4633 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4634 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4635 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4636 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4638 new_thr->th.th_def_allocator = __kmp_def_allocator;
4639 new_thr->th.th_prev_level = 0;
4640 new_thr->th.th_prev_num_threads = 1;
4642 TCW_4(new_thr->th.th_in_pool, FALSE);
4643 new_thr->th.th_active_in_pool = FALSE;
4644 TCW_4(new_thr->th.th_active, TRUE);
4652 if (__kmp_adjust_gtid_mode) {
4653 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4654 if (TCR_4(__kmp_gtid_mode) != 2) {
4655 TCW_4(__kmp_gtid_mode, 2);
4658 if (TCR_4(__kmp_gtid_mode) != 1) {
4659 TCW_4(__kmp_gtid_mode, 1);
4664 #ifdef KMP_ADJUST_BLOCKTIME
4667 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4668 if (__kmp_nth > __kmp_avail_proc) {
4669 __kmp_zero_bt = TRUE;
4676 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4677 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4679 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4681 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4692 static void __kmp_reinitialize_team(kmp_team_t *team,
4693 kmp_internal_control_t *new_icvs,
4695 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4696 team->t.t_threads[0], team));
4697 KMP_DEBUG_ASSERT(team && new_icvs);
4698 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4699 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4701 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4703 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4704 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4706 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4707 team->t.t_threads[0], team));
4713 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4714 kmp_internal_control_t *new_icvs,
4716 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4719 KMP_DEBUG_ASSERT(team);
4720 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4721 KMP_DEBUG_ASSERT(team->t.t_threads);
4724 team->t.t_master_tid = 0;
4726 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4727 team->t.t_nproc = new_nproc;
4730 team->t.t_next_pool = NULL;
4734 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4735 team->t.t_invoke = NULL;
4738 team->t.t_sched.sched = new_icvs->sched.sched;
4740 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4741 team->t.t_fp_control_saved = FALSE;
4742 team->t.t_x87_fpu_control_word = 0;
4743 team->t.t_mxcsr = 0;
4746 team->t.t_construct = 0;
4748 team->t.t_ordered.dt.t_value = 0;
4749 team->t.t_master_active = FALSE;
4752 team->t.t_copypriv_data = NULL;
4755 team->t.t_copyin_counter = 0;
4758 team->t.t_control_stack_top = NULL;
4760 __kmp_reinitialize_team(team, new_icvs, loc);
4763 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4766 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4769 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4770 if (KMP_AFFINITY_CAPABLE()) {
4772 if (old_mask != NULL) {
4773 status = __kmp_get_system_affinity(old_mask, TRUE);
4776 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4780 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4785 #if KMP_AFFINITY_SUPPORTED
4791 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4793 if (KMP_HIDDEN_HELPER_TEAM(team))
4796 kmp_info_t *master_th = team->t.t_threads[0];
4797 KMP_DEBUG_ASSERT(master_th != NULL);
4798 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4799 int first_place = master_th->th.th_first_place;
4800 int last_place = master_th->th.th_last_place;
4801 int masters_place = master_th->th.th_current_place;
4802 int num_masks = __kmp_affinity.num_masks;
4803 team->t.t_first_place = first_place;
4804 team->t.t_last_place = last_place;
4806 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4807 "bound to place %d partition = [%d,%d]\n",
4808 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4809 team->t.t_id, masters_place, first_place, last_place));
4811 switch (proc_bind) {
4813 case proc_bind_default:
4816 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4819 case proc_bind_primary: {
4821 int n_th = team->t.t_nproc;
4822 for (f = 1; f < n_th; f++) {
4823 kmp_info_t *th = team->t.t_threads[f];
4824 KMP_DEBUG_ASSERT(th != NULL);
4825 th->th.th_first_place = first_place;
4826 th->th.th_last_place = last_place;
4827 th->th.th_new_place = masters_place;
4828 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4829 team->t.t_display_affinity != 1) {
4830 team->t.t_display_affinity = 1;
4833 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4834 "partition = [%d,%d]\n",
4835 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4836 f, masters_place, first_place, last_place));
4840 case proc_bind_close: {
4842 int n_th = team->t.t_nproc;
4844 if (first_place <= last_place) {
4845 n_places = last_place - first_place + 1;
4847 n_places = num_masks - first_place + last_place + 1;
4849 if (n_th <= n_places) {
4850 int place = masters_place;
4851 for (f = 1; f < n_th; f++) {
4852 kmp_info_t *th = team->t.t_threads[f];
4853 KMP_DEBUG_ASSERT(th != NULL);
4855 if (place == last_place) {
4856 place = first_place;
4857 }
else if (place == (num_masks - 1)) {
4862 th->th.th_first_place = first_place;
4863 th->th.th_last_place = last_place;
4864 th->th.th_new_place = place;
4865 if (__kmp_display_affinity && place != th->th.th_current_place &&
4866 team->t.t_display_affinity != 1) {
4867 team->t.t_display_affinity = 1;
4870 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4871 "partition = [%d,%d]\n",
4872 __kmp_gtid_from_thread(team->t.t_threads[f]),
4873 team->t.t_id, f, place, first_place, last_place));
4876 int S, rem, gap, s_count;
4877 S = n_th / n_places;
4879 rem = n_th - (S * n_places);
4880 gap = rem > 0 ? n_places / rem : n_places;
4881 int place = masters_place;
4883 for (f = 0; f < n_th; f++) {
4884 kmp_info_t *th = team->t.t_threads[f];
4885 KMP_DEBUG_ASSERT(th != NULL);
4887 th->th.th_first_place = first_place;
4888 th->th.th_last_place = last_place;
4889 th->th.th_new_place = place;
4890 if (__kmp_display_affinity && place != th->th.th_current_place &&
4891 team->t.t_display_affinity != 1) {
4892 team->t.t_display_affinity = 1;
4896 if ((s_count == S) && rem && (gap_ct == gap)) {
4898 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4900 if (place == last_place) {
4901 place = first_place;
4902 }
else if (place == (num_masks - 1)) {
4910 }
else if (s_count == S) {
4911 if (place == last_place) {
4912 place = first_place;
4913 }
else if (place == (num_masks - 1)) {
4923 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4924 "partition = [%d,%d]\n",
4925 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4926 th->th.th_new_place, first_place, last_place));
4928 KMP_DEBUG_ASSERT(place == masters_place);
4932 case proc_bind_spread: {
4934 int n_th = team->t.t_nproc;
4937 if (first_place <= last_place) {
4938 n_places = last_place - first_place + 1;
4940 n_places = num_masks - first_place + last_place + 1;
4942 if (n_th <= n_places) {
4945 if (n_places != num_masks) {
4946 int S = n_places / n_th;
4947 int s_count, rem, gap, gap_ct;
4949 place = masters_place;
4950 rem = n_places - n_th * S;
4951 gap = rem ? n_th / rem : 1;
4954 if (update_master_only == 1)
4956 for (f = 0; f < thidx; f++) {
4957 kmp_info_t *th = team->t.t_threads[f];
4958 KMP_DEBUG_ASSERT(th != NULL);
4960 th->th.th_first_place = place;
4961 th->th.th_new_place = place;
4962 if (__kmp_display_affinity && place != th->th.th_current_place &&
4963 team->t.t_display_affinity != 1) {
4964 team->t.t_display_affinity = 1;
4967 while (s_count < S) {
4968 if (place == last_place) {
4969 place = first_place;
4970 }
else if (place == (num_masks - 1)) {
4977 if (rem && (gap_ct == gap)) {
4978 if (place == last_place) {
4979 place = first_place;
4980 }
else if (place == (num_masks - 1)) {
4988 th->th.th_last_place = place;
4991 if (place == last_place) {
4992 place = first_place;
4993 }
else if (place == (num_masks - 1)) {
5000 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5001 "partition = [%d,%d], num_masks: %u\n",
5002 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5003 f, th->th.th_new_place, th->th.th_first_place,
5004 th->th.th_last_place, num_masks));
5010 double current =
static_cast<double>(masters_place);
5012 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5017 if (update_master_only == 1)
5019 for (f = 0; f < thidx; f++) {
5020 first =
static_cast<int>(current);
5021 last =
static_cast<int>(current + spacing) - 1;
5022 KMP_DEBUG_ASSERT(last >= first);
5023 if (first >= n_places) {
5024 if (masters_place) {
5027 if (first == (masters_place + 1)) {
5028 KMP_DEBUG_ASSERT(f == n_th);
5031 if (last == masters_place) {
5032 KMP_DEBUG_ASSERT(f == (n_th - 1));
5036 KMP_DEBUG_ASSERT(f == n_th);
5041 if (last >= n_places) {
5042 last = (n_places - 1);
5047 KMP_DEBUG_ASSERT(0 <= first);
5048 KMP_DEBUG_ASSERT(n_places > first);
5049 KMP_DEBUG_ASSERT(0 <= last);
5050 KMP_DEBUG_ASSERT(n_places > last);
5051 KMP_DEBUG_ASSERT(last_place >= first_place);
5052 th = team->t.t_threads[f];
5053 KMP_DEBUG_ASSERT(th);
5054 th->th.th_first_place = first;
5055 th->th.th_new_place = place;
5056 th->th.th_last_place = last;
5057 if (__kmp_display_affinity && place != th->th.th_current_place &&
5058 team->t.t_display_affinity != 1) {
5059 team->t.t_display_affinity = 1;
5062 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5063 "partition = [%d,%d], spacing = %.4f\n",
5064 __kmp_gtid_from_thread(team->t.t_threads[f]),
5065 team->t.t_id, f, th->th.th_new_place,
5066 th->th.th_first_place, th->th.th_last_place, spacing));
5070 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5072 int S, rem, gap, s_count;
5073 S = n_th / n_places;
5075 rem = n_th - (S * n_places);
5076 gap = rem > 0 ? n_places / rem : n_places;
5077 int place = masters_place;
5080 if (update_master_only == 1)
5082 for (f = 0; f < thidx; f++) {
5083 kmp_info_t *th = team->t.t_threads[f];
5084 KMP_DEBUG_ASSERT(th != NULL);
5086 th->th.th_first_place = place;
5087 th->th.th_last_place = place;
5088 th->th.th_new_place = place;
5089 if (__kmp_display_affinity && place != th->th.th_current_place &&
5090 team->t.t_display_affinity != 1) {
5091 team->t.t_display_affinity = 1;
5095 if ((s_count == S) && rem && (gap_ct == gap)) {
5097 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5099 if (place == last_place) {
5100 place = first_place;
5101 }
else if (place == (num_masks - 1)) {
5109 }
else if (s_count == S) {
5110 if (place == last_place) {
5111 place = first_place;
5112 }
else if (place == (num_masks - 1)) {
5121 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5122 "partition = [%d,%d]\n",
5123 __kmp_gtid_from_thread(team->t.t_threads[f]),
5124 team->t.t_id, f, th->th.th_new_place,
5125 th->th.th_first_place, th->th.th_last_place));
5127 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5135 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5138 #endif // KMP_AFFINITY_SUPPORTED
5143 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5145 ompt_data_t ompt_parallel_data,
5147 kmp_proc_bind_t new_proc_bind,
5148 kmp_internal_control_t *new_icvs,
5149 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5150 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5153 int use_hot_team = !root->r.r_active;
5155 int do_place_partition = 1;
5157 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5158 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5159 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5162 #if KMP_NESTED_HOT_TEAMS
5163 kmp_hot_team_ptr_t *hot_teams;
5165 team = master->th.th_team;
5166 level = team->t.t_active_level;
5167 if (master->th.th_teams_microtask) {
5168 if (master->th.th_teams_size.nteams > 1 &&
5171 (microtask_t)__kmp_teams_master ||
5172 master->th.th_teams_level <
5179 if ((master->th.th_teams_size.nteams == 1 &&
5180 master->th.th_teams_level >= team->t.t_level) ||
5181 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5182 do_place_partition = 0;
5184 hot_teams = master->th.th_hot_teams;
5185 if (level < __kmp_hot_teams_max_level && hot_teams &&
5186 hot_teams[level].hot_team) {
5194 KMP_DEBUG_ASSERT(new_nproc == 1);
5198 if (use_hot_team && new_nproc > 1) {
5199 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5200 #if KMP_NESTED_HOT_TEAMS
5201 team = hot_teams[level].hot_team;
5203 team = root->r.r_hot_team;
5206 if (__kmp_tasking_mode != tskm_immediate_exec) {
5207 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5208 "task_team[1] = %p before reinit\n",
5209 team->t.t_task_team[0], team->t.t_task_team[1]));
5213 if (team->t.t_nproc != new_nproc &&
5214 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5216 int old_nthr = team->t.t_nproc;
5217 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5222 if (do_place_partition == 0)
5223 team->t.t_proc_bind = proc_bind_default;
5227 if (team->t.t_nproc == new_nproc) {
5228 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5231 if (team->t.t_size_changed == -1) {
5232 team->t.t_size_changed = 1;
5234 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5238 kmp_r_sched_t new_sched = new_icvs->sched;
5240 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5242 __kmp_reinitialize_team(team, new_icvs,
5243 root->r.r_uber_thread->th.th_ident);
5245 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5246 team->t.t_threads[0], team));
5247 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5249 #if KMP_AFFINITY_SUPPORTED
5250 if ((team->t.t_size_changed == 0) &&
5251 (team->t.t_proc_bind == new_proc_bind)) {
5252 if (new_proc_bind == proc_bind_spread) {
5253 if (do_place_partition) {
5255 __kmp_partition_places(team, 1);
5258 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5259 "proc_bind = %d, partition = [%d,%d]\n",
5260 team->t.t_id, new_proc_bind, team->t.t_first_place,
5261 team->t.t_last_place));
5263 if (do_place_partition) {
5264 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5265 __kmp_partition_places(team);
5269 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5271 }
else if (team->t.t_nproc > new_nproc) {
5273 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5276 team->t.t_size_changed = 1;
5277 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5280 __kmp_add_threads_to_team(team, new_nproc);
5282 #if KMP_NESTED_HOT_TEAMS
5283 if (__kmp_hot_teams_mode == 0) {
5286 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5287 hot_teams[level].hot_team_nth = new_nproc;
5288 #endif // KMP_NESTED_HOT_TEAMS
5290 for (f = new_nproc; f < team->t.t_nproc; f++) {
5291 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5292 if (__kmp_tasking_mode != tskm_immediate_exec) {
5295 team->t.t_threads[f]->th.th_task_team = NULL;
5297 __kmp_free_thread(team->t.t_threads[f]);
5298 team->t.t_threads[f] = NULL;
5300 #if KMP_NESTED_HOT_TEAMS
5305 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5306 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5307 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5308 for (
int b = 0; b < bs_last_barrier; ++b) {
5309 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5310 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5312 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5316 #endif // KMP_NESTED_HOT_TEAMS
5317 team->t.t_nproc = new_nproc;
5319 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5320 __kmp_reinitialize_team(team, new_icvs,
5321 root->r.r_uber_thread->th.th_ident);
5324 for (f = 0; f < new_nproc; ++f) {
5325 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5330 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5331 team->t.t_threads[0], team));
5333 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5336 for (f = 0; f < team->t.t_nproc; f++) {
5337 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5338 team->t.t_threads[f]->th.th_team_nproc ==
5343 if (do_place_partition) {
5344 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5345 #if KMP_AFFINITY_SUPPORTED
5346 __kmp_partition_places(team);
5350 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5351 kmp_affin_mask_t *old_mask;
5352 if (KMP_AFFINITY_CAPABLE()) {
5353 KMP_CPU_ALLOC(old_mask);
5358 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5360 int old_nproc = team->t.t_nproc;
5361 team->t.t_size_changed = 1;
5363 #if KMP_NESTED_HOT_TEAMS
5364 int avail_threads = hot_teams[level].hot_team_nth;
5365 if (new_nproc < avail_threads)
5366 avail_threads = new_nproc;
5367 kmp_info_t **other_threads = team->t.t_threads;
5368 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5372 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5373 for (b = 0; b < bs_last_barrier; ++b) {
5374 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5375 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5377 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5381 if (hot_teams[level].hot_team_nth >= new_nproc) {
5384 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5385 team->t.t_nproc = new_nproc;
5389 team->t.t_nproc = hot_teams[level].hot_team_nth;
5390 hot_teams[level].hot_team_nth = new_nproc;
5391 #endif // KMP_NESTED_HOT_TEAMS
5392 if (team->t.t_max_nproc < new_nproc) {
5394 __kmp_reallocate_team_arrays(team, new_nproc);
5395 __kmp_reinitialize_team(team, new_icvs, NULL);
5398 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5404 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5408 for (f = team->t.t_nproc; f < new_nproc; f++) {
5409 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5410 KMP_DEBUG_ASSERT(new_worker);
5411 team->t.t_threads[f] = new_worker;
5414 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5415 "join=%llu, plain=%llu\n",
5416 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5417 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5418 team->t.t_bar[bs_plain_barrier].b_arrived));
5422 kmp_balign_t *balign = new_worker->th.th_bar;
5423 for (b = 0; b < bs_last_barrier; ++b) {
5424 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5425 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5426 KMP_BARRIER_PARENT_FLAG);
5428 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5434 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5435 if (KMP_AFFINITY_CAPABLE()) {
5437 __kmp_set_system_affinity(old_mask, TRUE);
5438 KMP_CPU_FREE(old_mask);
5441 #if KMP_NESTED_HOT_TEAMS
5443 #endif // KMP_NESTED_HOT_TEAMS
5444 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5447 __kmp_add_threads_to_team(team, new_nproc);
5451 __kmp_initialize_team(team, new_nproc, new_icvs,
5452 root->r.r_uber_thread->th.th_ident);
5455 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5456 for (f = 0; f < team->t.t_nproc; ++f)
5457 __kmp_initialize_info(team->t.t_threads[f], team, f,
5458 __kmp_gtid_from_tid(f, team));
5461 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5462 for (f = old_nproc; f < team->t.t_nproc; ++f)
5463 team->t.t_threads[f]->th.th_task_state = old_state;
5466 for (f = 0; f < team->t.t_nproc; ++f) {
5467 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5468 team->t.t_threads[f]->th.th_team_nproc ==
5473 if (do_place_partition) {
5474 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5475 #if KMP_AFFINITY_SUPPORTED
5476 __kmp_partition_places(team);
5481 kmp_info_t *master = team->t.t_threads[0];
5482 if (master->th.th_teams_microtask) {
5483 for (f = 1; f < new_nproc; ++f) {
5485 kmp_info_t *thr = team->t.t_threads[f];
5486 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5487 thr->th.th_teams_level = master->th.th_teams_level;
5488 thr->th.th_teams_size = master->th.th_teams_size;
5491 #if KMP_NESTED_HOT_TEAMS
5495 for (f = 1; f < new_nproc; ++f) {
5496 kmp_info_t *thr = team->t.t_threads[f];
5498 kmp_balign_t *balign = thr->th.th_bar;
5499 for (b = 0; b < bs_last_barrier; ++b) {
5500 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5501 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5503 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5508 #endif // KMP_NESTED_HOT_TEAMS
5511 __kmp_alloc_argv_entries(argc, team, TRUE);
5512 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5516 KF_TRACE(10, (
" hot_team = %p\n", team));
5519 if (__kmp_tasking_mode != tskm_immediate_exec) {
5520 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5521 "task_team[1] = %p after reinit\n",
5522 team->t.t_task_team[0], team->t.t_task_team[1]));
5527 __ompt_team_assign_id(team, ompt_parallel_data);
5537 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5540 if (team->t.t_max_nproc >= max_nproc) {
5542 __kmp_team_pool = team->t.t_next_pool;
5544 if (max_nproc > 1 &&
5545 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5547 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5552 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5554 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5555 "task_team[1] %p to NULL\n",
5556 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5557 team->t.t_task_team[0] = NULL;
5558 team->t.t_task_team[1] = NULL;
5561 __kmp_alloc_argv_entries(argc, team, TRUE);
5562 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5565 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5566 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5569 for (b = 0; b < bs_last_barrier; ++b) {
5570 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5572 team->t.t_bar[b].b_master_arrived = 0;
5573 team->t.t_bar[b].b_team_arrived = 0;
5578 team->t.t_proc_bind = new_proc_bind;
5580 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5584 __ompt_team_assign_id(team, ompt_parallel_data);
5596 team = __kmp_reap_team(team);
5597 __kmp_team_pool = team;
5602 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5605 team->t.t_max_nproc = max_nproc;
5606 if (max_nproc > 1 &&
5607 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5609 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5614 __kmp_allocate_team_arrays(team, max_nproc);
5616 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5617 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5619 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5621 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5622 team->t.t_task_team[0] = NULL;
5624 team->t.t_task_team[1] = NULL;
5627 if (__kmp_storage_map) {
5628 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5632 __kmp_alloc_argv_entries(argc, team, FALSE);
5633 team->t.t_argc = argc;
5636 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5637 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5640 for (b = 0; b < bs_last_barrier; ++b) {
5641 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5643 team->t.t_bar[b].b_master_arrived = 0;
5644 team->t.t_bar[b].b_team_arrived = 0;
5649 team->t.t_proc_bind = new_proc_bind;
5652 __ompt_team_assign_id(team, ompt_parallel_data);
5653 team->t.ompt_serialized_team_info = NULL;
5658 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5669 void __kmp_free_team(kmp_root_t *root,
5670 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5672 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5676 KMP_DEBUG_ASSERT(root);
5677 KMP_DEBUG_ASSERT(team);
5678 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5679 KMP_DEBUG_ASSERT(team->t.t_threads);
5681 int use_hot_team = team == root->r.r_hot_team;
5682 #if KMP_NESTED_HOT_TEAMS
5685 level = team->t.t_active_level - 1;
5686 if (master->th.th_teams_microtask) {
5687 if (master->th.th_teams_size.nteams > 1) {
5691 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5692 master->th.th_teams_level == team->t.t_level) {
5698 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5700 if (level < __kmp_hot_teams_max_level) {
5701 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5705 #endif // KMP_NESTED_HOT_TEAMS
5708 TCW_SYNC_PTR(team->t.t_pkfn,
5711 team->t.t_copyin_counter = 0;
5716 if (!use_hot_team) {
5717 if (__kmp_tasking_mode != tskm_immediate_exec) {
5719 for (f = 1; f < team->t.t_nproc; ++f) {
5720 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5721 kmp_info_t *th = team->t.t_threads[f];
5722 volatile kmp_uint32 *state = &th->th.th_reap_state;
5723 while (*state != KMP_SAFE_TO_REAP) {
5727 if (!__kmp_is_thread_alive(th, &ecode)) {
5728 *state = KMP_SAFE_TO_REAP;
5733 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5734 if (fl.is_sleeping())
5735 fl.resume(__kmp_gtid_from_thread(th));
5742 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5743 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5744 if (task_team != NULL) {
5745 for (f = 0; f < team->t.t_nproc; ++f) {
5746 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5747 team->t.t_threads[f]->th.th_task_team = NULL;
5751 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5752 __kmp_get_gtid(), task_team, team->t.t_id));
5753 #if KMP_NESTED_HOT_TEAMS
5754 __kmp_free_task_team(master, task_team);
5756 team->t.t_task_team[tt_idx] = NULL;
5762 team->t.t_parent = NULL;
5763 team->t.t_level = 0;
5764 team->t.t_active_level = 0;
5767 for (f = 1; f < team->t.t_nproc; ++f) {
5768 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5769 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5770 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5773 __kmp_free_thread(team->t.t_threads[f]);
5776 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5779 team->t.b->go_release();
5780 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5781 for (f = 1; f < team->t.t_nproc; ++f) {
5782 if (team->t.b->sleep[f].sleep) {
5783 __kmp_atomic_resume_64(
5784 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5785 (kmp_atomic_flag_64<> *)NULL);
5790 for (
int f = 1; f < team->t.t_nproc; ++f) {
5791 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5797 for (f = 1; f < team->t.t_nproc; ++f) {
5798 team->t.t_threads[f] = NULL;
5801 if (team->t.t_max_nproc > 1 &&
5802 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5803 distributedBarrier::deallocate(team->t.b);
5808 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5809 __kmp_team_pool = (
volatile kmp_team_t *)team;
5812 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5813 team->t.t_threads[1]->th.th_cg_roots);
5814 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5816 for (f = 1; f < team->t.t_nproc; ++f) {
5817 kmp_info_t *thr = team->t.t_threads[f];
5818 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5819 thr->th.th_cg_roots->cg_root == thr);
5821 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5822 thr->th.th_cg_roots = tmp->up;
5823 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5824 " up to node %p. cg_nthreads was %d\n",
5825 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5826 int i = tmp->cg_nthreads--;
5831 if (thr->th.th_cg_roots)
5832 thr->th.th_current_task->td_icvs.thread_limit =
5833 thr->th.th_cg_roots->cg_thread_limit;
5842 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5843 kmp_team_t *next_pool = team->t.t_next_pool;
5845 KMP_DEBUG_ASSERT(team);
5846 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5847 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5848 KMP_DEBUG_ASSERT(team->t.t_threads);
5849 KMP_DEBUG_ASSERT(team->t.t_argv);
5854 __kmp_free_team_arrays(team);
5855 if (team->t.t_argv != &team->t.t_inline_argv[0])
5856 __kmp_free((
void *)team->t.t_argv);
5888 void __kmp_free_thread(kmp_info_t *this_th) {
5892 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5893 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5895 KMP_DEBUG_ASSERT(this_th);
5900 kmp_balign_t *balign = this_th->th.th_bar;
5901 for (b = 0; b < bs_last_barrier; ++b) {
5902 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5903 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5904 balign[b].bb.team = NULL;
5905 balign[b].bb.leaf_kids = 0;
5907 this_th->th.th_task_state = 0;
5908 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5911 TCW_PTR(this_th->th.th_team, NULL);
5912 TCW_PTR(this_th->th.th_root, NULL);
5913 TCW_PTR(this_th->th.th_dispatch, NULL);
5915 while (this_th->th.th_cg_roots) {
5916 this_th->th.th_cg_roots->cg_nthreads--;
5917 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5918 " %p of thread %p to %d\n",
5919 this_th, this_th->th.th_cg_roots,
5920 this_th->th.th_cg_roots->cg_root,
5921 this_th->th.th_cg_roots->cg_nthreads));
5922 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5923 if (tmp->cg_root == this_th) {
5924 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5926 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5927 this_th->th.th_cg_roots = tmp->up;
5930 if (tmp->cg_nthreads == 0) {
5933 this_th->th.th_cg_roots = NULL;
5943 __kmp_free_implicit_task(this_th);
5944 this_th->th.th_current_task = NULL;
5948 gtid = this_th->th.th_info.ds.ds_gtid;
5949 if (__kmp_thread_pool_insert_pt != NULL) {
5950 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5951 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5952 __kmp_thread_pool_insert_pt = NULL;
5961 if (__kmp_thread_pool_insert_pt != NULL) {
5962 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5964 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5966 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5967 scan = &((*scan)->th.th_next_pool))
5972 TCW_PTR(this_th->th.th_next_pool, *scan);
5973 __kmp_thread_pool_insert_pt = *scan = this_th;
5974 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5975 (this_th->th.th_info.ds.ds_gtid <
5976 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5977 TCW_4(this_th->th.th_in_pool, TRUE);
5978 __kmp_suspend_initialize_thread(this_th);
5979 __kmp_lock_suspend_mx(this_th);
5980 if (this_th->th.th_active == TRUE) {
5981 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5982 this_th->th.th_active_in_pool = TRUE;
5986 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5989 __kmp_unlock_suspend_mx(this_th);
5991 TCW_4(__kmp_nth, __kmp_nth - 1);
5993 #ifdef KMP_ADJUST_BLOCKTIME
5996 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5997 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5998 if (__kmp_nth <= __kmp_avail_proc) {
5999 __kmp_zero_bt = FALSE;
6009 void *__kmp_launch_thread(kmp_info_t *this_thr) {
6010 #if OMP_PROFILING_SUPPORT
6011 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6013 if (ProfileTraceFile)
6014 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6017 int gtid = this_thr->th.th_info.ds.ds_gtid;
6019 kmp_team_t **
volatile pteam;
6022 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6024 if (__kmp_env_consistency_check) {
6025 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6029 if (ompd_state & OMPD_ENABLE_BP)
6030 ompd_bp_thread_begin();
6034 ompt_data_t *thread_data =
nullptr;
6035 if (ompt_enabled.enabled) {
6036 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6037 *thread_data = ompt_data_none;
6039 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6040 this_thr->th.ompt_thread_info.wait_id = 0;
6041 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6042 this_thr->th.ompt_thread_info.parallel_flags = 0;
6043 if (ompt_enabled.ompt_callback_thread_begin) {
6044 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6045 ompt_thread_worker, thread_data);
6047 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6052 while (!TCR_4(__kmp_global.g.g_done)) {
6053 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6057 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6060 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6063 if (ompt_enabled.enabled) {
6064 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6068 pteam = &this_thr->th.th_team;
6071 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6073 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6076 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6077 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6078 (*pteam)->t.t_pkfn));
6080 updateHWFPControl(*pteam);
6083 if (ompt_enabled.enabled) {
6084 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6088 rc = (*pteam)->t.t_invoke(gtid);
6092 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6093 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6094 (*pteam)->t.t_pkfn));
6097 if (ompt_enabled.enabled) {
6099 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6101 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6105 __kmp_join_barrier(gtid);
6110 if (ompd_state & OMPD_ENABLE_BP)
6111 ompd_bp_thread_end();
6115 if (ompt_enabled.ompt_callback_thread_end) {
6116 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6120 this_thr->th.th_task_team = NULL;
6122 __kmp_common_destroy_gtid(gtid);
6124 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6127 #if OMP_PROFILING_SUPPORT
6128 llvm::timeTraceProfilerFinishThread();
6135 void __kmp_internal_end_dest(
void *specific_gtid) {
6138 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6140 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6144 __kmp_internal_end_thread(gtid);
6147 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6149 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6150 __kmp_internal_end_atexit();
6157 void __kmp_internal_end_atexit(
void) {
6158 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6182 __kmp_internal_end_library(-1);
6184 __kmp_close_console();
6188 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6193 KMP_DEBUG_ASSERT(thread != NULL);
6195 gtid = thread->th.th_info.ds.ds_gtid;
6198 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6201 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6203 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6205 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6207 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6211 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6213 __kmp_release_64(&flag);
6218 __kmp_reap_worker(thread);
6230 if (thread->th.th_active_in_pool) {
6231 thread->th.th_active_in_pool = FALSE;
6232 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6233 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6237 __kmp_free_implicit_task(thread);
6241 __kmp_free_fast_memory(thread);
6244 __kmp_suspend_uninitialize_thread(thread);
6246 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6247 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6252 #ifdef KMP_ADJUST_BLOCKTIME
6255 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6256 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6257 if (__kmp_nth <= __kmp_avail_proc) {
6258 __kmp_zero_bt = FALSE;
6264 if (__kmp_env_consistency_check) {
6265 if (thread->th.th_cons) {
6266 __kmp_free_cons_stack(thread->th.th_cons);
6267 thread->th.th_cons = NULL;
6271 if (thread->th.th_pri_common != NULL) {
6272 __kmp_free(thread->th.th_pri_common);
6273 thread->th.th_pri_common = NULL;
6276 if (thread->th.th_task_state_memo_stack != NULL) {
6277 __kmp_free(thread->th.th_task_state_memo_stack);
6278 thread->th.th_task_state_memo_stack = NULL;
6282 if (thread->th.th_local.bget_data != NULL) {
6283 __kmp_finalize_bget(thread);
6287 #if KMP_AFFINITY_SUPPORTED
6288 if (thread->th.th_affin_mask != NULL) {
6289 KMP_CPU_FREE(thread->th.th_affin_mask);
6290 thread->th.th_affin_mask = NULL;
6294 #if KMP_USE_HIER_SCHED
6295 if (thread->th.th_hier_bar_data != NULL) {
6296 __kmp_free(thread->th.th_hier_bar_data);
6297 thread->th.th_hier_bar_data = NULL;
6301 __kmp_reap_team(thread->th.th_serial_team);
6302 thread->th.th_serial_team = NULL;
6309 static void __kmp_itthash_clean(kmp_info_t *th) {
6311 if (__kmp_itt_region_domains.count > 0) {
6312 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6313 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6315 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6316 __kmp_thread_free(th, bucket);
6321 if (__kmp_itt_barrier_domains.count > 0) {
6322 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6323 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6325 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6326 __kmp_thread_free(th, bucket);
6334 static void __kmp_internal_end(
void) {
6338 __kmp_unregister_library();
6345 __kmp_reclaim_dead_roots();
6349 for (i = 0; i < __kmp_threads_capacity; i++)
6351 if (__kmp_root[i]->r.r_active)
6354 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6356 if (i < __kmp_threads_capacity) {
6368 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6369 if (TCR_4(__kmp_init_monitor)) {
6370 __kmp_reap_monitor(&__kmp_monitor);
6371 TCW_4(__kmp_init_monitor, 0);
6373 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6374 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6375 #endif // KMP_USE_MONITOR
6380 for (i = 0; i < __kmp_threads_capacity; i++) {
6381 if (__kmp_root[i]) {
6384 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6393 while (__kmp_thread_pool != NULL) {
6395 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6396 __kmp_thread_pool = thread->th.th_next_pool;
6398 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6399 thread->th.th_next_pool = NULL;
6400 thread->th.th_in_pool = FALSE;
6401 __kmp_reap_thread(thread, 0);
6403 __kmp_thread_pool_insert_pt = NULL;
6406 while (__kmp_team_pool != NULL) {
6408 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6409 __kmp_team_pool = team->t.t_next_pool;
6411 team->t.t_next_pool = NULL;
6412 __kmp_reap_team(team);
6415 __kmp_reap_task_teams();
6422 for (i = 0; i < __kmp_threads_capacity; i++) {
6423 kmp_info_t *thr = __kmp_threads[i];
6424 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6429 for (i = 0; i < __kmp_threads_capacity; ++i) {
6436 TCW_SYNC_4(__kmp_init_common, FALSE);
6438 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6446 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6447 if (TCR_4(__kmp_init_monitor)) {
6448 __kmp_reap_monitor(&__kmp_monitor);
6449 TCW_4(__kmp_init_monitor, 0);
6451 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6452 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6455 TCW_4(__kmp_init_gtid, FALSE);
6464 void __kmp_internal_end_library(
int gtid_req) {
6471 if (__kmp_global.g.g_abort) {
6472 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6476 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6477 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6482 if (TCR_4(__kmp_init_hidden_helper) &&
6483 !TCR_4(__kmp_hidden_helper_team_done)) {
6484 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6486 __kmp_hidden_helper_main_thread_release();
6488 __kmp_hidden_helper_threads_deinitz_wait();
6494 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6496 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6497 if (gtid == KMP_GTID_SHUTDOWN) {
6498 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6499 "already shutdown\n"));
6501 }
else if (gtid == KMP_GTID_MONITOR) {
6502 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6503 "registered, or system shutdown\n"));
6505 }
else if (gtid == KMP_GTID_DNE) {
6506 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6509 }
else if (KMP_UBER_GTID(gtid)) {
6511 if (__kmp_root[gtid]->r.r_active) {
6512 __kmp_global.g.g_abort = -1;
6513 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6514 __kmp_unregister_library();
6516 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6520 __kmp_itthash_clean(__kmp_threads[gtid]);
6523 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6524 __kmp_unregister_root_current_thread(gtid);
6531 #ifdef DUMP_DEBUG_ON_EXIT
6532 if (__kmp_debug_buf)
6533 __kmp_dump_debug_buffer();
6538 __kmp_unregister_library();
6543 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6546 if (__kmp_global.g.g_abort) {
6547 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6552 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6553 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6562 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6565 __kmp_internal_end();
6567 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6568 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6570 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6572 #ifdef DUMP_DEBUG_ON_EXIT
6573 if (__kmp_debug_buf)
6574 __kmp_dump_debug_buffer();
6578 __kmp_close_console();
6581 __kmp_fini_allocator();
6585 void __kmp_internal_end_thread(
int gtid_req) {
6594 if (__kmp_global.g.g_abort) {
6595 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6599 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6600 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6605 if (TCR_4(__kmp_init_hidden_helper) &&
6606 !TCR_4(__kmp_hidden_helper_team_done)) {
6607 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6609 __kmp_hidden_helper_main_thread_release();
6611 __kmp_hidden_helper_threads_deinitz_wait();
6618 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6620 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6621 if (gtid == KMP_GTID_SHUTDOWN) {
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6623 "already shutdown\n"));
6625 }
else if (gtid == KMP_GTID_MONITOR) {
6626 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6627 "registered, or system shutdown\n"));
6629 }
else if (gtid == KMP_GTID_DNE) {
6630 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6634 }
else if (KMP_UBER_GTID(gtid)) {
6636 if (__kmp_root[gtid]->r.r_active) {
6637 __kmp_global.g.g_abort = -1;
6638 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6640 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6644 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6646 __kmp_unregister_root_current_thread(gtid);
6650 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6653 __kmp_threads[gtid]->th.th_task_team = NULL;
6657 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6663 if (__kmp_pause_status != kmp_hard_paused)
6667 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6672 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6675 if (__kmp_global.g.g_abort) {
6676 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6681 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6682 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6693 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6695 for (i = 0; i < __kmp_threads_capacity; ++i) {
6696 if (KMP_UBER_GTID(i)) {
6699 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6700 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6701 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6708 __kmp_internal_end();
6710 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6711 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6713 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6715 #ifdef DUMP_DEBUG_ON_EXIT
6716 if (__kmp_debug_buf)
6717 __kmp_dump_debug_buffer();
6724 static long __kmp_registration_flag = 0;
6726 static char *__kmp_registration_str = NULL;
6729 static inline char *__kmp_reg_status_name() {
6735 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6736 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6739 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6743 #if defined(KMP_USE_SHM)
6745 char *temp_reg_status_file_name =
nullptr;
6748 void __kmp_register_library_startup(
void) {
6750 char *name = __kmp_reg_status_name();
6756 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6757 __kmp_initialize_system_tick();
6759 __kmp_read_system_time(&time.dtime);
6760 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6761 __kmp_registration_str =
6762 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6763 __kmp_registration_flag, KMP_LIBRARY_FILE);
6765 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6766 __kmp_registration_str));
6772 #if defined(KMP_USE_SHM)
6773 char *shm_name = __kmp_str_format(
"/%s", name);
6774 int shm_preexist = 0;
6776 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6777 if ((fd1 == -1) && (errno == EEXIST)) {
6780 fd1 = shm_open(shm_name, O_RDWR, 0666);
6783 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6789 }
else if (fd1 == -1) {
6794 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6795 fd1 = mkstemp(temp_file_name);
6798 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6801 temp_reg_status_file_name = temp_file_name;
6803 if (shm_preexist == 0) {
6805 if (ftruncate(fd1, SHM_SIZE) == -1) {
6807 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6808 KMP_ERR(errno), __kmp_msg_null);
6812 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6813 if (data1 == MAP_FAILED) {
6815 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6818 if (shm_preexist == 0) {
6819 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6822 value = __kmp_str_format(
"%s", data1);
6823 munmap(data1, SHM_SIZE);
6825 #else // Windows and unix with static library
6827 __kmp_env_set(name, __kmp_registration_str, 0);
6829 value = __kmp_env_get(name);
6832 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6839 char *flag_addr_str = NULL;
6840 char *flag_val_str = NULL;
6841 char const *file_name = NULL;
6842 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6843 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6846 unsigned long *flag_addr = 0;
6847 unsigned long flag_val = 0;
6848 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6849 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6850 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6854 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6868 file_name =
"unknown library";
6873 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6874 if (!__kmp_str_match_true(duplicate_ok)) {
6876 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6877 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6879 KMP_INTERNAL_FREE(duplicate_ok);
6880 __kmp_duplicate_library_ok = 1;
6885 #if defined(KMP_USE_SHM)
6887 shm_unlink(shm_name);
6890 __kmp_env_unset(name);
6894 KMP_DEBUG_ASSERT(0);
6898 KMP_INTERNAL_FREE((
void *)value);
6899 #if defined(KMP_USE_SHM)
6900 KMP_INTERNAL_FREE((
void *)shm_name);
6903 KMP_INTERNAL_FREE((
void *)name);
6907 void __kmp_unregister_library(
void) {
6909 char *name = __kmp_reg_status_name();
6912 #if defined(KMP_USE_SHM)
6913 bool use_shm =
true;
6914 char *shm_name = __kmp_str_format(
"/%s", name);
6915 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6919 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6920 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6926 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6927 if (data1 != MAP_FAILED) {
6928 value = __kmp_str_format(
"%s", data1);
6929 munmap(data1, SHM_SIZE);
6933 value = __kmp_env_get(name);
6936 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6937 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6938 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6940 #if defined(KMP_USE_SHM)
6942 shm_unlink(shm_name);
6944 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6945 unlink(temp_reg_status_file_name);
6948 __kmp_env_unset(name);
6952 #if defined(KMP_USE_SHM)
6953 KMP_INTERNAL_FREE(shm_name);
6955 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6956 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6960 KMP_INTERNAL_FREE(__kmp_registration_str);
6961 KMP_INTERNAL_FREE(value);
6962 KMP_INTERNAL_FREE(name);
6964 __kmp_registration_flag = 0;
6965 __kmp_registration_str = NULL;
6972 #if KMP_MIC_SUPPORTED
6974 static void __kmp_check_mic_type() {
6975 kmp_cpuid_t cpuid_state = {0};
6976 kmp_cpuid_t *cs_p = &cpuid_state;
6977 __kmp_x86_cpuid(1, 0, cs_p);
6979 if ((cs_p->eax & 0xff0) == 0xB10) {
6980 __kmp_mic_type = mic2;
6981 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6982 __kmp_mic_type = mic3;
6984 __kmp_mic_type = non_mic;
6991 static void __kmp_user_level_mwait_init() {
6992 struct kmp_cpuid buf;
6993 __kmp_x86_cpuid(7, 0, &buf);
6994 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6995 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6996 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6997 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6998 __kmp_umwait_enabled));
7000 #elif KMP_HAVE_MWAIT
7001 #ifndef AT_INTELPHIUSERMWAIT
7004 #define AT_INTELPHIUSERMWAIT 10000
7009 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7010 unsigned long getauxval(
unsigned long) {
return 0; }
7012 static void __kmp_user_level_mwait_init() {
7017 if (__kmp_mic_type == mic3) {
7018 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7019 if ((res & 0x1) || __kmp_user_level_mwait) {
7020 __kmp_mwait_enabled = TRUE;
7021 if (__kmp_user_level_mwait) {
7022 KMP_INFORM(EnvMwaitWarn);
7025 __kmp_mwait_enabled = FALSE;
7028 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7029 "__kmp_mwait_enabled = %d\n",
7030 __kmp_mic_type, __kmp_mwait_enabled));
7034 static void __kmp_do_serial_initialize(
void) {
7038 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7040 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7041 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7042 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7043 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7044 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7054 __kmp_validate_locks();
7056 #if ENABLE_LIBOMPTARGET
7058 __kmp_init_omptarget();
7062 __kmp_init_allocator();
7068 if (__kmp_need_register_serial)
7069 __kmp_register_library_startup();
7072 if (TCR_4(__kmp_global.g.g_done)) {
7073 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7076 __kmp_global.g.g_abort = 0;
7077 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7080 #if KMP_USE_ADAPTIVE_LOCKS
7081 #if KMP_DEBUG_ADAPTIVE_LOCKS
7082 __kmp_init_speculative_stats();
7085 #if KMP_STATS_ENABLED
7088 __kmp_init_lock(&__kmp_global_lock);
7089 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7090 __kmp_init_lock(&__kmp_debug_lock);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7098 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7099 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7100 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7101 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7102 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7103 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7104 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7105 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7107 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7109 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7113 __kmp_runtime_initialize();
7115 #if KMP_MIC_SUPPORTED
7116 __kmp_check_mic_type();
7123 __kmp_abort_delay = 0;
7127 __kmp_dflt_team_nth_ub = __kmp_xproc;
7128 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7129 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7131 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7132 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7134 __kmp_max_nth = __kmp_sys_max_nth;
7135 __kmp_cg_max_nth = __kmp_sys_max_nth;
7136 __kmp_teams_max_nth = __kmp_xproc;
7137 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7138 __kmp_teams_max_nth = __kmp_sys_max_nth;
7143 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7145 __kmp_monitor_wakeups =
7146 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7147 __kmp_bt_intervals =
7148 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7151 __kmp_library = library_throughput;
7153 __kmp_static = kmp_sch_static_balanced;
7160 #if KMP_FAST_REDUCTION_BARRIER
7161 #define kmp_reduction_barrier_gather_bb ((int)1)
7162 #define kmp_reduction_barrier_release_bb ((int)1)
7163 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7164 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7165 #endif // KMP_FAST_REDUCTION_BARRIER
7166 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7167 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7168 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7169 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7170 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7171 #if KMP_FAST_REDUCTION_BARRIER
7172 if (i == bs_reduction_barrier) {
7174 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7175 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7176 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7177 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7179 #endif // KMP_FAST_REDUCTION_BARRIER
7181 #if KMP_FAST_REDUCTION_BARRIER
7182 #undef kmp_reduction_barrier_release_pat
7183 #undef kmp_reduction_barrier_gather_pat
7184 #undef kmp_reduction_barrier_release_bb
7185 #undef kmp_reduction_barrier_gather_bb
7186 #endif // KMP_FAST_REDUCTION_BARRIER
7187 #if KMP_MIC_SUPPORTED
7188 if (__kmp_mic_type == mic2) {
7190 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7191 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7193 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7194 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7196 #if KMP_FAST_REDUCTION_BARRIER
7197 if (__kmp_mic_type == mic2) {
7198 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7199 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7201 #endif // KMP_FAST_REDUCTION_BARRIER
7202 #endif // KMP_MIC_SUPPORTED
7206 __kmp_env_checks = TRUE;
7208 __kmp_env_checks = FALSE;
7212 __kmp_foreign_tp = TRUE;
7214 __kmp_global.g.g_dynamic = FALSE;
7215 __kmp_global.g.g_dynamic_mode = dynamic_default;
7217 __kmp_init_nesting_mode();
7219 __kmp_env_initialize(NULL);
7221 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7222 __kmp_user_level_mwait_init();
7226 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7227 if (__kmp_str_match_true(val)) {
7228 kmp_str_buf_t buffer;
7229 __kmp_str_buf_init(&buffer);
7230 __kmp_i18n_dump_catalog(&buffer);
7231 __kmp_printf(
"%s", buffer.str);
7232 __kmp_str_buf_free(&buffer);
7234 __kmp_env_free(&val);
7237 __kmp_threads_capacity =
7238 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7240 __kmp_tp_capacity = __kmp_default_tp_capacity(
7241 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7246 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7247 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7248 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7249 __kmp_thread_pool = NULL;
7250 __kmp_thread_pool_insert_pt = NULL;
7251 __kmp_team_pool = NULL;
7258 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7260 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7261 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7262 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7265 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7267 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7272 gtid = __kmp_register_root(TRUE);
7273 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7274 KMP_ASSERT(KMP_UBER_GTID(gtid));
7275 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7279 __kmp_common_initialize();
7283 __kmp_register_atfork();
7286 #if !KMP_DYNAMIC_LIB || \
7287 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7292 int rc = atexit(__kmp_internal_end_atexit);
7294 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7300 #if KMP_HANDLE_SIGNALS
7306 __kmp_install_signals(FALSE);
7309 __kmp_install_signals(TRUE);
7314 __kmp_init_counter++;
7316 __kmp_init_serial = TRUE;
7318 if (__kmp_settings) {
7322 if (__kmp_display_env || __kmp_display_env_verbose) {
7323 __kmp_env_print_2();
7332 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7335 void __kmp_serial_initialize(
void) {
7336 if (__kmp_init_serial) {
7339 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7340 if (__kmp_init_serial) {
7341 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7344 __kmp_do_serial_initialize();
7345 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7348 static void __kmp_do_middle_initialize(
void) {
7350 int prev_dflt_team_nth;
7352 if (!__kmp_init_serial) {
7353 __kmp_do_serial_initialize();
7356 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7358 if (UNLIKELY(!__kmp_need_register_serial)) {
7361 __kmp_register_library_startup();
7366 prev_dflt_team_nth = __kmp_dflt_team_nth;
7368 #if KMP_AFFINITY_SUPPORTED
7371 __kmp_affinity_initialize(__kmp_affinity);
7375 KMP_ASSERT(__kmp_xproc > 0);
7376 if (__kmp_avail_proc == 0) {
7377 __kmp_avail_proc = __kmp_xproc;
7383 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7384 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7389 if (__kmp_dflt_team_nth == 0) {
7390 #ifdef KMP_DFLT_NTH_CORES
7392 __kmp_dflt_team_nth = __kmp_ncores;
7393 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7394 "__kmp_ncores (%d)\n",
7395 __kmp_dflt_team_nth));
7398 __kmp_dflt_team_nth = __kmp_avail_proc;
7399 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7400 "__kmp_avail_proc(%d)\n",
7401 __kmp_dflt_team_nth));
7405 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7406 __kmp_dflt_team_nth = KMP_MIN_NTH;
7408 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7409 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7412 if (__kmp_nesting_mode > 0)
7413 __kmp_set_nesting_mode_threads();
7417 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7419 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7424 for (i = 0; i < __kmp_threads_capacity; i++) {
7425 kmp_info_t *thread = __kmp_threads[i];
7428 if (thread->th.th_current_task->td_icvs.nproc != 0)
7431 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7436 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7437 __kmp_dflt_team_nth));
7439 #ifdef KMP_ADJUST_BLOCKTIME
7441 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7442 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7443 if (__kmp_nth > __kmp_avail_proc) {
7444 __kmp_zero_bt = TRUE;
7450 TCW_SYNC_4(__kmp_init_middle, TRUE);
7452 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7455 void __kmp_middle_initialize(
void) {
7456 if (__kmp_init_middle) {
7459 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7460 if (__kmp_init_middle) {
7461 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7464 __kmp_do_middle_initialize();
7465 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7468 void __kmp_parallel_initialize(
void) {
7469 int gtid = __kmp_entry_gtid();
7472 if (TCR_4(__kmp_init_parallel))
7474 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7475 if (TCR_4(__kmp_init_parallel)) {
7476 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7481 if (TCR_4(__kmp_global.g.g_done)) {
7484 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7485 __kmp_infinite_loop();
7491 if (!__kmp_init_middle) {
7492 __kmp_do_middle_initialize();
7494 __kmp_assign_root_init_mask();
7495 __kmp_resume_if_hard_paused();
7498 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7499 KMP_ASSERT(KMP_UBER_GTID(gtid));
7501 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7504 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7505 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7506 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7510 #if KMP_HANDLE_SIGNALS
7512 __kmp_install_signals(TRUE);
7516 __kmp_suspend_initialize();
7518 #if defined(USE_LOAD_BALANCE)
7519 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7520 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7523 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7524 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7528 if (__kmp_version) {
7529 __kmp_print_version_2();
7533 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7536 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7538 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7541 void __kmp_hidden_helper_initialize() {
7542 if (TCR_4(__kmp_init_hidden_helper))
7546 if (!TCR_4(__kmp_init_parallel))
7547 __kmp_parallel_initialize();
7551 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7552 if (TCR_4(__kmp_init_hidden_helper)) {
7553 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7557 #if KMP_AFFINITY_SUPPORTED
7561 if (!__kmp_hh_affinity.flags.initialized)
7562 __kmp_affinity_initialize(__kmp_hh_affinity);
7566 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7570 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7573 __kmp_do_initialize_hidden_helper_threads();
7576 __kmp_hidden_helper_threads_initz_wait();
7579 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7581 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7586 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7588 kmp_disp_t *dispatch;
7593 this_thr->th.th_local.this_construct = 0;
7594 #if KMP_CACHE_MANAGE
7595 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7597 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7598 KMP_DEBUG_ASSERT(dispatch);
7599 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7603 dispatch->th_disp_index = 0;
7604 dispatch->th_doacross_buf_idx = 0;
7605 if (__kmp_env_consistency_check)
7606 __kmp_push_parallel(gtid, team->t.t_ident);
7611 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7613 if (__kmp_env_consistency_check)
7614 __kmp_pop_parallel(gtid, team->t.t_ident);
7616 __kmp_finish_implicit_task(this_thr);
7619 int __kmp_invoke_task_func(
int gtid) {
7621 int tid = __kmp_tid_from_gtid(gtid);
7622 kmp_info_t *this_thr = __kmp_threads[gtid];
7623 kmp_team_t *team = this_thr->th.th_team;
7625 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7627 if (__itt_stack_caller_create_ptr) {
7629 if (team->t.t_stack_id != NULL) {
7630 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7632 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7633 __kmp_itt_stack_callee_enter(
7634 (__itt_caller)team->t.t_parent->t.t_stack_id);
7638 #if INCLUDE_SSC_MARKS
7639 SSC_MARK_INVOKING();
7644 void **exit_frame_p;
7645 ompt_data_t *my_task_data;
7646 ompt_data_t *my_parallel_data;
7649 if (ompt_enabled.enabled) {
7650 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7651 .ompt_task_info.frame.exit_frame.ptr);
7653 exit_frame_p = &dummy;
7657 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7658 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7659 if (ompt_enabled.ompt_callback_implicit_task) {
7660 ompt_team_size = team->t.t_nproc;
7661 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7662 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7663 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7664 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7668 #if KMP_STATS_ENABLED
7670 if (previous_state == stats_state_e::TEAMS_REGION) {
7671 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7673 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7675 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7678 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7679 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7686 *exit_frame_p = NULL;
7687 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7690 #if KMP_STATS_ENABLED
7691 if (previous_state == stats_state_e::TEAMS_REGION) {
7692 KMP_SET_THREAD_STATE(previous_state);
7694 KMP_POP_PARTITIONED_TIMER();
7698 if (__itt_stack_caller_create_ptr) {
7700 if (team->t.t_stack_id != NULL) {
7701 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7703 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7704 __kmp_itt_stack_callee_leave(
7705 (__itt_caller)team->t.t_parent->t.t_stack_id);
7709 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7714 void __kmp_teams_master(
int gtid) {
7716 kmp_info_t *thr = __kmp_threads[gtid];
7717 kmp_team_t *team = thr->th.th_team;
7718 ident_t *loc = team->t.t_ident;
7719 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7720 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7721 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7722 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7723 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7726 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7729 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7730 tmp->cg_nthreads = 1;
7731 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7732 " cg_nthreads to 1\n",
7734 tmp->up = thr->th.th_cg_roots;
7735 thr->th.th_cg_roots = tmp;
7739 #if INCLUDE_SSC_MARKS
7742 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7743 (microtask_t)thr->th.th_teams_microtask,
7744 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7745 #if INCLUDE_SSC_MARKS
7749 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7750 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7753 __kmp_join_call(loc, gtid
7762 int __kmp_invoke_teams_master(
int gtid) {
7763 kmp_info_t *this_thr = __kmp_threads[gtid];
7764 kmp_team_t *team = this_thr->th.th_team;
7766 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7767 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7768 (
void *)__kmp_teams_master);
7770 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7772 int tid = __kmp_tid_from_gtid(gtid);
7773 ompt_data_t *task_data =
7774 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7775 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7776 if (ompt_enabled.ompt_callback_implicit_task) {
7777 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7778 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7780 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7783 __kmp_teams_master(gtid);
7785 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7787 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7796 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7797 kmp_info_t *thr = __kmp_threads[gtid];
7799 if (num_threads > 0)
7800 thr->th.th_set_nproc = num_threads;
7803 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7805 KMP_DEBUG_ASSERT(thr);
7807 if (!TCR_4(__kmp_init_middle))
7808 __kmp_middle_initialize();
7809 __kmp_assign_root_init_mask();
7810 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7811 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7813 if (num_threads == 0) {
7814 if (__kmp_teams_thread_limit > 0) {
7815 num_threads = __kmp_teams_thread_limit;
7817 num_threads = __kmp_avail_proc / num_teams;
7822 if (num_threads > __kmp_dflt_team_nth) {
7823 num_threads = __kmp_dflt_team_nth;
7825 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7826 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7828 if (num_teams * num_threads > __kmp_teams_max_nth) {
7829 num_threads = __kmp_teams_max_nth / num_teams;
7831 if (num_threads == 0) {
7835 if (num_threads < 0) {
7836 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7842 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7844 if (num_threads > __kmp_dflt_team_nth) {
7845 num_threads = __kmp_dflt_team_nth;
7847 if (num_teams * num_threads > __kmp_teams_max_nth) {
7848 int new_threads = __kmp_teams_max_nth / num_teams;
7849 if (new_threads == 0) {
7852 if (new_threads != num_threads) {
7853 if (!__kmp_reserve_warn) {
7854 __kmp_reserve_warn = 1;
7855 __kmp_msg(kmp_ms_warning,
7856 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7857 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7860 num_threads = new_threads;
7863 thr->th.th_teams_size.nth = num_threads;
7868 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7870 kmp_info_t *thr = __kmp_threads[gtid];
7871 if (num_teams < 0) {
7874 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7878 if (num_teams == 0) {
7879 if (__kmp_nteams > 0) {
7880 num_teams = __kmp_nteams;
7885 if (num_teams > __kmp_teams_max_nth) {
7886 if (!__kmp_reserve_warn) {
7887 __kmp_reserve_warn = 1;
7888 __kmp_msg(kmp_ms_warning,
7889 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7890 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7892 num_teams = __kmp_teams_max_nth;
7896 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7898 __kmp_push_thread_limit(thr, num_teams, num_threads);
7903 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7904 int num_teams_ub,
int num_threads) {
7905 kmp_info_t *thr = __kmp_threads[gtid];
7906 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7907 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7908 KMP_DEBUG_ASSERT(num_threads >= 0);
7910 if (num_teams_lb > num_teams_ub) {
7911 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7912 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7917 if (num_teams_lb == 0 && num_teams_ub > 0)
7918 num_teams_lb = num_teams_ub;
7920 if (num_teams_lb == 0 && num_teams_ub == 0) {
7921 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7922 if (num_teams > __kmp_teams_max_nth) {
7923 if (!__kmp_reserve_warn) {
7924 __kmp_reserve_warn = 1;
7925 __kmp_msg(kmp_ms_warning,
7926 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7927 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7929 num_teams = __kmp_teams_max_nth;
7931 }
else if (num_teams_lb == num_teams_ub) {
7932 num_teams = num_teams_ub;
7934 if (num_threads <= 0) {
7935 if (num_teams_ub > __kmp_teams_max_nth) {
7936 num_teams = num_teams_lb;
7938 num_teams = num_teams_ub;
7941 num_teams = (num_threads > __kmp_teams_max_nth)
7943 : __kmp_teams_max_nth / num_threads;
7944 if (num_teams < num_teams_lb) {
7945 num_teams = num_teams_lb;
7946 }
else if (num_teams > num_teams_ub) {
7947 num_teams = num_teams_ub;
7953 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7955 __kmp_push_thread_limit(thr, num_teams, num_threads);
7959 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7960 kmp_info_t *thr = __kmp_threads[gtid];
7961 thr->th.th_set_proc_bind = proc_bind;
7966 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7967 kmp_info_t *this_thr = __kmp_threads[gtid];
7973 KMP_DEBUG_ASSERT(team);
7974 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7975 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7978 team->t.t_construct = 0;
7979 team->t.t_ordered.dt.t_value =
7983 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7984 if (team->t.t_max_nproc > 1) {
7986 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7987 team->t.t_disp_buffer[i].buffer_index = i;
7988 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7991 team->t.t_disp_buffer[0].buffer_index = 0;
7992 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7996 KMP_ASSERT(this_thr->th.th_team == team);
7999 for (f = 0; f < team->t.t_nproc; f++) {
8000 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8001 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8006 __kmp_fork_barrier(gtid, 0);
8009 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8010 kmp_info_t *this_thr = __kmp_threads[gtid];
8012 KMP_DEBUG_ASSERT(team);
8013 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8014 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8020 if (__kmp_threads[gtid] &&
8021 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8022 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8023 __kmp_threads[gtid]);
8024 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8025 "team->t.t_nproc=%d\n",
8026 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8028 __kmp_print_structure();
8030 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8031 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8034 __kmp_join_barrier(gtid);
8036 if (ompt_enabled.enabled &&
8037 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8038 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8039 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8040 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8042 void *codeptr = NULL;
8043 if (KMP_MASTER_TID(ds_tid) &&
8044 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8045 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8046 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8048 if (ompt_enabled.ompt_callback_sync_region_wait) {
8049 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8050 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8053 if (ompt_enabled.ompt_callback_sync_region) {
8054 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8055 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8059 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8060 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8061 ompt_scope_end, NULL, task_data, 0, ds_tid,
8062 ompt_task_implicit);
8068 KMP_ASSERT(this_thr->th.th_team == team);
8073 #ifdef USE_LOAD_BALANCE
8077 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8080 kmp_team_t *hot_team;
8082 if (root->r.r_active) {
8085 hot_team = root->r.r_hot_team;
8086 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8087 return hot_team->t.t_nproc - 1;
8092 for (i = 1; i < hot_team->t.t_nproc; i++) {
8093 if (hot_team->t.t_threads[i]->th.th_active) {
8102 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8105 int hot_team_active;
8106 int team_curr_active;
8109 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8111 KMP_DEBUG_ASSERT(root);
8112 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8113 ->th.th_current_task->td_icvs.dynamic == TRUE);
8114 KMP_DEBUG_ASSERT(set_nproc > 1);
8116 if (set_nproc == 1) {
8117 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8126 pool_active = __kmp_thread_pool_active_nth;
8127 hot_team_active = __kmp_active_hot_team_nproc(root);
8128 team_curr_active = pool_active + hot_team_active + 1;
8131 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8132 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8133 "hot team active = %d\n",
8134 system_active, pool_active, hot_team_active));
8136 if (system_active < 0) {
8140 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8141 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8144 retval = __kmp_avail_proc - __kmp_nth +
8145 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8146 if (retval > set_nproc) {
8149 if (retval < KMP_MIN_NTH) {
8150 retval = KMP_MIN_NTH;
8153 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8161 if (system_active < team_curr_active) {
8162 system_active = team_curr_active;
8164 retval = __kmp_avail_proc - system_active + team_curr_active;
8165 if (retval > set_nproc) {
8168 if (retval < KMP_MIN_NTH) {
8169 retval = KMP_MIN_NTH;
8172 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8181 void __kmp_cleanup(
void) {
8184 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8186 if (TCR_4(__kmp_init_parallel)) {
8187 #if KMP_HANDLE_SIGNALS
8188 __kmp_remove_signals();
8190 TCW_4(__kmp_init_parallel, FALSE);
8193 if (TCR_4(__kmp_init_middle)) {
8194 #if KMP_AFFINITY_SUPPORTED
8195 __kmp_affinity_uninitialize();
8197 __kmp_cleanup_hierarchy();
8198 TCW_4(__kmp_init_middle, FALSE);
8201 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8203 if (__kmp_init_serial) {
8204 __kmp_runtime_destroy();
8205 __kmp_init_serial = FALSE;
8208 __kmp_cleanup_threadprivate_caches();
8210 for (f = 0; f < __kmp_threads_capacity; f++) {
8211 if (__kmp_root[f] != NULL) {
8212 __kmp_free(__kmp_root[f]);
8213 __kmp_root[f] = NULL;
8216 __kmp_free(__kmp_threads);
8219 __kmp_threads = NULL;
8221 __kmp_threads_capacity = 0;
8224 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8226 kmp_old_threads_list_t *next = ptr->next;
8227 __kmp_free(ptr->threads);
8232 #if KMP_USE_DYNAMIC_LOCK
8233 __kmp_cleanup_indirect_user_locks();
8235 __kmp_cleanup_user_locks();
8239 __kmp_free(ompd_env_block);
8240 ompd_env_block = NULL;
8241 ompd_env_block_size = 0;
8245 #if KMP_AFFINITY_SUPPORTED
8246 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8247 __kmp_cpuinfo_file = NULL;
8250 #if KMP_USE_ADAPTIVE_LOCKS
8251 #if KMP_DEBUG_ADAPTIVE_LOCKS
8252 __kmp_print_speculative_stats();
8255 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8256 __kmp_nested_nth.nth = NULL;
8257 __kmp_nested_nth.size = 0;
8258 __kmp_nested_nth.used = 0;
8259 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8260 __kmp_nested_proc_bind.bind_types = NULL;
8261 __kmp_nested_proc_bind.size = 0;
8262 __kmp_nested_proc_bind.used = 0;
8263 if (__kmp_affinity_format) {
8264 KMP_INTERNAL_FREE(__kmp_affinity_format);
8265 __kmp_affinity_format = NULL;
8268 __kmp_i18n_catclose();
8270 #if KMP_USE_HIER_SCHED
8271 __kmp_hier_scheds.deallocate();
8274 #if KMP_STATS_ENABLED
8278 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8283 int __kmp_ignore_mppbeg(
void) {
8286 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8287 if (__kmp_str_match_false(env))
8294 int __kmp_ignore_mppend(
void) {
8297 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8298 if (__kmp_str_match_false(env))
8305 void __kmp_internal_begin(
void) {
8311 gtid = __kmp_entry_gtid();
8312 root = __kmp_threads[gtid]->th.th_root;
8313 KMP_ASSERT(KMP_UBER_GTID(gtid));
8315 if (root->r.r_begin)
8317 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8318 if (root->r.r_begin) {
8319 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8323 root->r.r_begin = TRUE;
8325 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8330 void __kmp_user_set_library(
enum library_type arg) {
8337 gtid = __kmp_entry_gtid();
8338 thread = __kmp_threads[gtid];
8340 root = thread->th.th_root;
8342 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8344 if (root->r.r_in_parallel) {
8346 KMP_WARNING(SetLibraryIncorrectCall);
8351 case library_serial:
8352 thread->th.th_set_nproc = 0;
8353 set__nproc(thread, 1);
8355 case library_turnaround:
8356 thread->th.th_set_nproc = 0;
8357 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8358 : __kmp_dflt_team_nth_ub);
8360 case library_throughput:
8361 thread->th.th_set_nproc = 0;
8362 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8363 : __kmp_dflt_team_nth_ub);
8366 KMP_FATAL(UnknownLibraryType, arg);
8369 __kmp_aux_set_library(arg);
8372 void __kmp_aux_set_stacksize(
size_t arg) {
8373 if (!__kmp_init_serial)
8374 __kmp_serial_initialize();
8377 if (arg & (0x1000 - 1)) {
8378 arg &= ~(0x1000 - 1);
8383 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8386 if (!TCR_4(__kmp_init_parallel)) {
8389 if (value < __kmp_sys_min_stksize)
8390 value = __kmp_sys_min_stksize;
8391 else if (value > KMP_MAX_STKSIZE)
8392 value = KMP_MAX_STKSIZE;
8394 __kmp_stksize = value;
8396 __kmp_env_stksize = TRUE;
8399 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8404 void __kmp_aux_set_library(
enum library_type arg) {
8405 __kmp_library = arg;
8407 switch (__kmp_library) {
8408 case library_serial: {
8409 KMP_INFORM(LibraryIsSerial);
8411 case library_turnaround:
8412 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8413 __kmp_use_yield = 2;
8415 case library_throughput:
8416 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8417 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8420 KMP_FATAL(UnknownLibraryType, arg);
8426 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8427 kmp_info_t *thr = __kmp_entry_thread();
8428 teams_serialized = 0;
8429 if (thr->th.th_teams_microtask) {
8430 kmp_team_t *team = thr->th.th_team;
8431 int tlevel = thr->th.th_teams_level;
8432 int ii = team->t.t_level;
8433 teams_serialized = team->t.t_serialized;
8434 int level = tlevel + 1;
8435 KMP_DEBUG_ASSERT(ii >= tlevel);
8436 while (ii > level) {
8437 for (teams_serialized = team->t.t_serialized;
8438 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8440 if (team->t.t_serialized && (!teams_serialized)) {
8441 team = team->t.t_parent;
8445 team = team->t.t_parent;
8454 int __kmp_aux_get_team_num() {
8456 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8458 if (serialized > 1) {
8461 return team->t.t_master_tid;
8467 int __kmp_aux_get_num_teams() {
8469 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8471 if (serialized > 1) {
8474 return team->t.t_parent->t.t_nproc;
8513 typedef struct kmp_affinity_format_field_t {
8515 const char *long_name;
8518 } kmp_affinity_format_field_t;
8520 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8521 #if KMP_AFFINITY_SUPPORTED
8522 {
'A',
"thread_affinity",
's'},
8524 {
't',
"team_num",
'd'},
8525 {
'T',
"num_teams",
'd'},
8526 {
'L',
"nesting_level",
'd'},
8527 {
'n',
"thread_num",
'd'},
8528 {
'N',
"num_threads",
'd'},
8529 {
'a',
"ancestor_tnum",
'd'},
8531 {
'P',
"process_id",
'd'},
8532 {
'i',
"native_thread_id",
'd'}};
8535 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8537 kmp_str_buf_t *field_buffer) {
8538 int rc, format_index, field_value;
8539 const char *width_left, *width_right;
8540 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8541 static const int FORMAT_SIZE = 20;
8542 char format[FORMAT_SIZE] = {0};
8543 char absolute_short_name = 0;
8545 KMP_DEBUG_ASSERT(gtid >= 0);
8546 KMP_DEBUG_ASSERT(th);
8547 KMP_DEBUG_ASSERT(**ptr ==
'%');
8548 KMP_DEBUG_ASSERT(field_buffer);
8550 __kmp_str_buf_clear(field_buffer);
8557 __kmp_str_buf_cat(field_buffer,
"%", 1);
8568 right_justify =
false;
8570 right_justify =
true;
8574 width_left = width_right = NULL;
8575 if (**ptr >=
'0' && **ptr <=
'9') {
8583 format[format_index++] =
'%';
8585 format[format_index++] =
'-';
8587 format[format_index++] =
'0';
8588 if (width_left && width_right) {
8592 while (i < 8 && width_left < width_right) {
8593 format[format_index++] = *width_left;
8601 found_valid_name =
false;
8602 parse_long_name = (**ptr ==
'{');
8603 if (parse_long_name)
8605 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8606 sizeof(__kmp_affinity_format_table[0]);
8608 char short_name = __kmp_affinity_format_table[i].short_name;
8609 const char *long_name = __kmp_affinity_format_table[i].long_name;
8610 char field_format = __kmp_affinity_format_table[i].field_format;
8611 if (parse_long_name) {
8612 size_t length = KMP_STRLEN(long_name);
8613 if (strncmp(*ptr, long_name, length) == 0) {
8614 found_valid_name =
true;
8617 }
else if (**ptr == short_name) {
8618 found_valid_name =
true;
8621 if (found_valid_name) {
8622 format[format_index++] = field_format;
8623 format[format_index++] =
'\0';
8624 absolute_short_name = short_name;
8628 if (parse_long_name) {
8630 absolute_short_name = 0;
8638 switch (absolute_short_name) {
8640 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8643 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8646 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8649 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8652 static const int BUFFER_SIZE = 256;
8653 char buf[BUFFER_SIZE];
8654 __kmp_expand_host_name(buf, BUFFER_SIZE);
8655 rc = __kmp_str_buf_print(field_buffer, format, buf);
8658 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8661 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8664 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8668 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8669 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8671 #if KMP_AFFINITY_SUPPORTED
8674 __kmp_str_buf_init(&buf);
8675 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8676 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8677 __kmp_str_buf_free(&buf);
8683 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8685 if (parse_long_name) {
8694 KMP_ASSERT(format_index <= FORMAT_SIZE);
8704 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8705 kmp_str_buf_t *buffer) {
8706 const char *parse_ptr;
8708 const kmp_info_t *th;
8709 kmp_str_buf_t field;
8711 KMP_DEBUG_ASSERT(buffer);
8712 KMP_DEBUG_ASSERT(gtid >= 0);
8714 __kmp_str_buf_init(&field);
8715 __kmp_str_buf_clear(buffer);
8717 th = __kmp_threads[gtid];
8723 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8724 parse_ptr = __kmp_affinity_format;
8726 KMP_DEBUG_ASSERT(parse_ptr);
8728 while (*parse_ptr !=
'\0') {
8730 if (*parse_ptr ==
'%') {
8732 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8733 __kmp_str_buf_catbuf(buffer, &field);
8737 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8742 __kmp_str_buf_free(&field);
8747 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8749 __kmp_str_buf_init(&buf);
8750 __kmp_aux_capture_affinity(gtid, format, &buf);
8751 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8752 __kmp_str_buf_free(&buf);
8757 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8758 int blocktime = arg;
8764 __kmp_save_internal_controls(thread);
8767 if (blocktime < KMP_MIN_BLOCKTIME)
8768 blocktime = KMP_MIN_BLOCKTIME;
8769 else if (blocktime > KMP_MAX_BLOCKTIME)
8770 blocktime = KMP_MAX_BLOCKTIME;
8772 set__blocktime_team(thread->th.th_team, tid, blocktime);
8773 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8777 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8779 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8780 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8786 set__bt_set_team(thread->th.th_team, tid, bt_set);
8787 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8789 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8790 "bt_intervals=%d, monitor_updates=%d\n",
8791 __kmp_gtid_from_tid(tid, thread->th.th_team),
8792 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8793 __kmp_monitor_wakeups));
8795 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8796 __kmp_gtid_from_tid(tid, thread->th.th_team),
8797 thread->th.th_team->t.t_id, tid, blocktime));
8801 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8802 if (!__kmp_init_serial) {
8803 __kmp_serial_initialize();
8805 __kmp_env_initialize(str);
8807 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8815 PACKED_REDUCTION_METHOD_T
8816 __kmp_determine_reduction_method(
8817 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8818 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8819 kmp_critical_name *lck) {
8830 PACKED_REDUCTION_METHOD_T retval;
8834 KMP_DEBUG_ASSERT(lck);
8836 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8838 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8839 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8841 retval = critical_reduce_block;
8844 team_size = __kmp_get_team_num_threads(global_tid);
8845 if (team_size == 1) {
8847 retval = empty_reduce_block;
8851 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8853 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8854 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8856 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8857 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8859 int teamsize_cutoff = 4;
8861 #if KMP_MIC_SUPPORTED
8862 if (__kmp_mic_type != non_mic) {
8863 teamsize_cutoff = 8;
8866 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8867 if (tree_available) {
8868 if (team_size <= teamsize_cutoff) {
8869 if (atomic_available) {
8870 retval = atomic_reduce_block;
8873 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8875 }
else if (atomic_available) {
8876 retval = atomic_reduce_block;
8879 #error "Unknown or unsupported OS"
8880 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8883 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8885 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8889 if (atomic_available) {
8890 if (num_vars <= 2) {
8891 retval = atomic_reduce_block;
8897 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8898 if (atomic_available && (num_vars <= 3)) {
8899 retval = atomic_reduce_block;
8900 }
else if (tree_available) {
8901 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8902 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8903 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8908 #error "Unknown or unsupported OS"
8912 #error "Unknown or unsupported architecture"
8920 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8923 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8925 int atomic_available, tree_available;
8927 switch ((forced_retval = __kmp_force_reduction_method)) {
8928 case critical_reduce_block:
8932 case atomic_reduce_block:
8933 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8934 if (!atomic_available) {
8935 KMP_WARNING(RedMethodNotSupported,
"atomic");
8936 forced_retval = critical_reduce_block;
8940 case tree_reduce_block:
8941 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8942 if (!tree_available) {
8943 KMP_WARNING(RedMethodNotSupported,
"tree");
8944 forced_retval = critical_reduce_block;
8946 #if KMP_FAST_REDUCTION_BARRIER
8947 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8956 retval = forced_retval;
8959 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8961 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8962 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8967 kmp_int32 __kmp_get_reduce_method(
void) {
8968 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8973 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8977 void __kmp_hard_pause() {
8978 __kmp_pause_status = kmp_hard_paused;
8979 __kmp_internal_end_thread(-1);
8983 void __kmp_resume_if_soft_paused() {
8984 if (__kmp_pause_status == kmp_soft_paused) {
8985 __kmp_pause_status = kmp_not_paused;
8987 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8988 kmp_info_t *thread = __kmp_threads[gtid];
8990 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8992 if (fl.is_sleeping())
8994 else if (__kmp_try_suspend_mx(thread)) {
8995 __kmp_unlock_suspend_mx(thread);
8998 if (fl.is_sleeping()) {
9001 }
else if (__kmp_try_suspend_mx(thread)) {
9002 __kmp_unlock_suspend_mx(thread);
9014 int __kmp_pause_resource(kmp_pause_status_t level) {
9015 if (level == kmp_not_paused) {
9016 if (__kmp_pause_status == kmp_not_paused) {
9020 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9021 __kmp_pause_status == kmp_hard_paused);
9022 __kmp_pause_status = kmp_not_paused;
9025 }
else if (level == kmp_soft_paused) {
9026 if (__kmp_pause_status != kmp_not_paused) {
9033 }
else if (level == kmp_hard_paused) {
9034 if (__kmp_pause_status != kmp_not_paused) {
9047 void __kmp_omp_display_env(
int verbose) {
9048 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9049 if (__kmp_init_serial == 0)
9050 __kmp_do_serial_initialize();
9051 __kmp_display_env_impl(!verbose, verbose);
9052 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9056 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9058 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9060 kmp_info_t **other_threads = team->t.t_threads;
9064 for (
int f = 1; f < old_nthreads; ++f) {
9065 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9067 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9073 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9074 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9078 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9080 team->t.t_threads[f]->th.th_used_in_team.store(2);
9081 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9084 team->t.b->go_release();
9090 int count = old_nthreads - 1;
9092 count = old_nthreads - 1;
9093 for (
int f = 1; f < old_nthreads; ++f) {
9094 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9095 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9096 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9097 void *, other_threads[f]->th.th_sleep_loc);
9098 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9101 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9107 team->t.b->update_num_threads(new_nthreads);
9108 team->t.b->go_reset();
9111 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9113 KMP_DEBUG_ASSERT(team);
9119 for (
int f = 1; f < new_nthreads; ++f) {
9120 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9121 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9123 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9124 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9125 (kmp_flag_32<false, false> *)NULL);
9131 int count = new_nthreads - 1;
9133 count = new_nthreads - 1;
9134 for (
int f = 1; f < new_nthreads; ++f) {
9135 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9143 kmp_info_t **__kmp_hidden_helper_threads;
9144 kmp_info_t *__kmp_hidden_helper_main_thread;
9145 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9147 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9148 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9150 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9151 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9155 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9157 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9162 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9163 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9164 __kmp_hidden_helper_threads_num)
9170 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9171 __kmp_hidden_helper_initz_release();
9172 __kmp_hidden_helper_main_thread_wait();
9174 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9175 __kmp_hidden_helper_worker_thread_signal();
9181 void __kmp_hidden_helper_threads_initz_routine() {
9183 const int gtid = __kmp_register_root(TRUE);
9184 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9185 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9186 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9187 __kmp_hidden_helper_threads_num;
9189 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9194 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9196 __kmp_hidden_helper_threads_deinitz_release();
9216 void __kmp_init_nesting_mode() {
9217 int levels = KMP_HW_LAST;
9218 __kmp_nesting_mode_nlevels = levels;
9219 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9220 for (
int i = 0; i < levels; ++i)
9221 __kmp_nesting_nth_level[i] = 0;
9222 if (__kmp_nested_nth.size < levels) {
9223 __kmp_nested_nth.nth =
9224 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9225 __kmp_nested_nth.size = levels;
9230 void __kmp_set_nesting_mode_threads() {
9231 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9233 if (__kmp_nesting_mode == 1)
9234 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9235 else if (__kmp_nesting_mode > 1)
9236 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9238 if (__kmp_topology) {
9240 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9241 loc < __kmp_nesting_mode_nlevels;
9242 loc++, hw_level++) {
9243 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9244 if (__kmp_nesting_nth_level[loc] == 1)
9248 if (__kmp_nesting_mode > 1 && loc > 1) {
9249 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9250 int num_cores = __kmp_topology->get_count(core_level);
9251 int upper_levels = 1;
9252 for (
int level = 0; level < loc - 1; ++level)
9253 upper_levels *= __kmp_nesting_nth_level[level];
9254 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9255 __kmp_nesting_nth_level[loc - 1] =
9256 num_cores / __kmp_nesting_nth_level[loc - 2];
9258 __kmp_nesting_mode_nlevels = loc;
9259 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9261 if (__kmp_avail_proc >= 4) {
9262 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9263 __kmp_nesting_nth_level[1] = 2;
9264 __kmp_nesting_mode_nlevels = 2;
9266 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9267 __kmp_nesting_mode_nlevels = 1;
9269 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9271 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9272 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9274 set__nproc(thread, __kmp_nesting_nth_level[0]);
9275 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9276 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9277 if (get__max_active_levels(thread) > 1) {
9279 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9281 if (__kmp_nesting_mode == 1)
9282 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9287 #if !KMP_STATS_ENABLED
9288 void __kmp_reset_stats() {}
9291 int __kmp_omp_debug_struct_info = FALSE;
9292 int __kmp_debugging = FALSE;
9294 #if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9295 void __kmp_itt_fini_ittlib() {}
9296 void __kmp_itt_init_ittlib() {}