LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344  kmp_int32 cond, void *args) {
345  int gtid = __kmp_entry_gtid();
346  if (cond) {
347  if (args)
348  __kmpc_fork_call(loc, argc, microtask, args);
349  else
350  __kmpc_fork_call(loc, argc, microtask);
351  } else {
352  __kmpc_serialized_parallel(loc, gtid);
353 
354 #if OMPT_SUPPORT
355  void *exit_frame_ptr;
356 #endif
357 
358  if (args)
359  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
360  /*npr=*/0,
361  /*argc=*/1, &args
362 #if OMPT_SUPPORT
363  ,
364  &exit_frame_ptr
365 #endif
366  );
367  else
368  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
369  /*npr=*/0,
370  /*argc=*/0,
371  /*args=*/nullptr
372 #if OMPT_SUPPORT
373  ,
374  &exit_frame_ptr
375 #endif
376  );
377 
379  }
380 }
381 
393 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
394  kmp_int32 num_teams, kmp_int32 num_threads) {
395  KA_TRACE(20,
396  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397  global_tid, num_teams, num_threads));
398  __kmp_assert_valid_gtid(global_tid);
399  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
400 }
401 
412 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
413  kmp_int32 thread_limit) {
414  __kmp_assert_valid_gtid(global_tid);
415  kmp_info_t *thread = __kmp_threads[global_tid];
416  if (thread_limit > 0)
417  thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
418 }
419 
436 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
437  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
438  kmp_int32 num_threads) {
439  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440  " num_teams_ub=%d num_threads=%d\n",
441  global_tid, num_teams_lb, num_teams_ub, num_threads));
442  __kmp_assert_valid_gtid(global_tid);
443  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
444  num_threads);
445 }
446 
457 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
458  ...) {
459  int gtid = __kmp_entry_gtid();
460  kmp_info_t *this_thr = __kmp_threads[gtid];
461  va_list ap;
462  va_start(ap, microtask);
463 
464 #if KMP_STATS_ENABLED
465  KMP_COUNT_BLOCK(OMP_TEAMS);
466  stats_state_e previous_state = KMP_GET_THREAD_STATE();
467  if (previous_state == stats_state_e::SERIAL_REGION) {
468  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
469  } else {
470  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
471  }
472 #endif
473 
474  // remember teams entry point and nesting level
475  this_thr->th.th_teams_microtask = microtask;
476  this_thr->th.th_teams_level =
477  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
478 
479 #if OMPT_SUPPORT
480  kmp_team_t *parent_team = this_thr->th.th_team;
481  int tid = __kmp_tid_from_gtid(gtid);
482  if (ompt_enabled.enabled) {
483  parent_team->t.t_implicit_task_taskdata[tid]
484  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
485  }
486  OMPT_STORE_RETURN_ADDRESS(gtid);
487 #endif
488 
489  // check if __kmpc_push_num_teams called, set default number of teams
490  // otherwise
491  if (this_thr->th.th_teams_size.nteams == 0) {
492  __kmp_push_num_teams(loc, gtid, 0, 0);
493  }
494  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
495  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
496  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
497 
498  __kmp_fork_call(
499  loc, gtid, fork_context_intel, argc,
500  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
501  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
502  __kmp_join_call(loc, gtid
503 #if OMPT_SUPPORT
504  ,
505  fork_context_intel
506 #endif
507  );
508 
509  // Pop current CG root off list
510  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
511  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
512  this_thr->th.th_cg_roots = tmp->up;
513  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
514  " to node %p. cg_nthreads was %d\n",
515  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
516  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
517  int i = tmp->cg_nthreads--;
518  if (i == 1) { // check is we are the last thread in CG (not always the case)
519  __kmp_free(tmp);
520  }
521  // Restore current task's thread_limit from CG root
522  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
523  this_thr->th.th_current_task->td_icvs.thread_limit =
524  this_thr->th.th_cg_roots->cg_thread_limit;
525 
526  this_thr->th.th_teams_microtask = NULL;
527  this_thr->th.th_teams_level = 0;
528  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
529  va_end(ap);
530 #if KMP_STATS_ENABLED
531  if (previous_state == stats_state_e::SERIAL_REGION) {
532  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
533  KMP_SET_THREAD_STATE(previous_state);
534  } else {
535  KMP_POP_PARTITIONED_TIMER();
536  }
537 #endif // KMP_STATS_ENABLED
538 }
539 
540 // I don't think this function should ever have been exported.
541 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
542 // openmp code ever called it, but it's been exported from the RTL for so
543 // long that I'm afraid to remove the definition.
544 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
545 
558 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
559  // The implementation is now in kmp_runtime.cpp so that it can share static
560  // functions with kmp_fork_call since the tasks to be done are similar in
561  // each case.
562  __kmp_assert_valid_gtid(global_tid);
563 #if OMPT_SUPPORT
564  OMPT_STORE_RETURN_ADDRESS(global_tid);
565 #endif
566  __kmp_serialized_parallel(loc, global_tid);
567 }
568 
576 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
577  kmp_internal_control_t *top;
578  kmp_info_t *this_thr;
579  kmp_team_t *serial_team;
580 
581  KC_TRACE(10,
582  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
583 
584  /* skip all this code for autopar serialized loops since it results in
585  unacceptable overhead */
586  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
587  return;
588 
589  // Not autopar code
590  __kmp_assert_valid_gtid(global_tid);
591  if (!TCR_4(__kmp_init_parallel))
592  __kmp_parallel_initialize();
593 
594  __kmp_resume_if_soft_paused();
595 
596  this_thr = __kmp_threads[global_tid];
597  serial_team = this_thr->th.th_serial_team;
598 
599  kmp_task_team_t *task_team = this_thr->th.th_task_team;
600  // we need to wait for the proxy tasks before finishing the thread
601  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
602  task_team->tt.tt_hidden_helper_task_encountered))
603  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
604 
605  KMP_MB();
606  KMP_DEBUG_ASSERT(serial_team);
607  KMP_ASSERT(serial_team->t.t_serialized);
608  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
609  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
610  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
611  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
612 
613 #if OMPT_SUPPORT
614  if (ompt_enabled.enabled &&
615  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
616  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
617  if (ompt_enabled.ompt_callback_implicit_task) {
618  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
619  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
620  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
621  }
622 
623  // reset clear the task id only after unlinking the task
624  ompt_data_t *parent_task_data;
625  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
626 
627  if (ompt_enabled.ompt_callback_parallel_end) {
628  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
629  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
630  ompt_parallel_invoker_program | ompt_parallel_team,
631  OMPT_LOAD_RETURN_ADDRESS(global_tid));
632  }
633  __ompt_lw_taskteam_unlink(this_thr);
634  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
635  }
636 #endif
637 
638  /* If necessary, pop the internal control stack values and replace the team
639  * values */
640  top = serial_team->t.t_control_stack_top;
641  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
642  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
643  serial_team->t.t_control_stack_top = top->next;
644  __kmp_free(top);
645  }
646 
647  /* pop dispatch buffers stack */
648  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
649  {
650  dispatch_private_info_t *disp_buffer =
651  serial_team->t.t_dispatch->th_disp_buffer;
652  serial_team->t.t_dispatch->th_disp_buffer =
653  serial_team->t.t_dispatch->th_disp_buffer->next;
654  __kmp_free(disp_buffer);
655  }
656  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
657 
658  --serial_team->t.t_serialized;
659  if (serial_team->t.t_serialized == 0) {
660 
661  /* return to the parallel section */
662 
663 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
664  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
665  __kmp_clear_x87_fpu_status_word();
666  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
667  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
668  }
669 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
670 
671  __kmp_pop_current_task_from_thread(this_thr);
672 #if OMPD_SUPPORT
673  if (ompd_state & OMPD_ENABLE_BP)
674  ompd_bp_parallel_end();
675 #endif
676 
677  this_thr->th.th_team = serial_team->t.t_parent;
678  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
679 
680  /* restore values cached in the thread */
681  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
682  this_thr->th.th_team_master =
683  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
684  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
685 
686  /* TODO the below shouldn't need to be adjusted for serialized teams */
687  this_thr->th.th_dispatch =
688  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
689 
690  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
691  this_thr->th.th_current_task->td_flags.executing = 1;
692 
693  if (__kmp_tasking_mode != tskm_immediate_exec) {
694  // Copy the task team from the new child / old parent team to the thread.
695  this_thr->th.th_task_team =
696  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
697  KA_TRACE(20,
698  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
699  "team %p\n",
700  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
701  }
702 #if KMP_AFFINITY_SUPPORTED
703  if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
704  __kmp_reset_root_init_mask(global_tid);
705  }
706 #endif
707  } else {
708  if (__kmp_tasking_mode != tskm_immediate_exec) {
709  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710  "depth of serial team %p to %d\n",
711  global_tid, serial_team, serial_team->t.t_serialized));
712  }
713  }
714 
715  serial_team->t.t_level--;
716  if (__kmp_env_consistency_check)
717  __kmp_pop_parallel(global_tid, NULL);
718 #if OMPT_SUPPORT
719  if (ompt_enabled.enabled)
720  this_thr->th.ompt_thread_info.state =
721  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
722  : ompt_state_work_parallel);
723 #endif
724 }
725 
734 void __kmpc_flush(ident_t *loc) {
735  KC_TRACE(10, ("__kmpc_flush: called\n"));
736 
737  /* need explicit __mf() here since use volatile instead in library */
738  KMP_MFENCE(); /* Flush all pending memory write invalidates. */
739 
740 #if OMPT_SUPPORT && OMPT_OPTIONAL
741  if (ompt_enabled.ompt_callback_flush) {
742  ompt_callbacks.ompt_callback(ompt_callback_flush)(
743  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
744  }
745 #endif
746 }
747 
748 /* -------------------------------------------------------------------------- */
756 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
757  KMP_COUNT_BLOCK(OMP_BARRIER);
758  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
759  __kmp_assert_valid_gtid(global_tid);
760 
761  if (!TCR_4(__kmp_init_parallel))
762  __kmp_parallel_initialize();
763 
764  __kmp_resume_if_soft_paused();
765 
766  if (__kmp_env_consistency_check) {
767  if (loc == 0) {
768  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
769  }
770  __kmp_check_barrier(global_tid, ct_barrier, loc);
771  }
772 
773 #if OMPT_SUPPORT
774  ompt_frame_t *ompt_frame;
775  if (ompt_enabled.enabled) {
776  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
777  if (ompt_frame->enter_frame.ptr == NULL)
778  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
779  }
780  OMPT_STORE_RETURN_ADDRESS(global_tid);
781 #endif
782  __kmp_threads[global_tid]->th.th_ident = loc;
783  // TODO: explicit barrier_wait_id:
784  // this function is called when 'barrier' directive is present or
785  // implicit barrier at the end of a worksharing construct.
786  // 1) better to add a per-thread barrier counter to a thread data structure
787  // 2) set to 0 when a new team is created
788  // 4) no sync is required
789 
790  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
791 #if OMPT_SUPPORT && OMPT_OPTIONAL
792  if (ompt_enabled.enabled) {
793  ompt_frame->enter_frame = ompt_data_none;
794  }
795 #endif
796 }
797 
798 /* The BARRIER for a MASTER section is always explicit */
805 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
806  int status = 0;
807 
808  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
809  __kmp_assert_valid_gtid(global_tid);
810 
811  if (!TCR_4(__kmp_init_parallel))
812  __kmp_parallel_initialize();
813 
814  __kmp_resume_if_soft_paused();
815 
816  if (KMP_MASTER_GTID(global_tid)) {
817  KMP_COUNT_BLOCK(OMP_MASTER);
818  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
819  status = 1;
820  }
821 
822 #if OMPT_SUPPORT && OMPT_OPTIONAL
823  if (status) {
824  if (ompt_enabled.ompt_callback_masked) {
825  kmp_info_t *this_thr = __kmp_threads[global_tid];
826  kmp_team_t *team = this_thr->th.th_team;
827 
828  int tid = __kmp_tid_from_gtid(global_tid);
829  ompt_callbacks.ompt_callback(ompt_callback_masked)(
830  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
831  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832  OMPT_GET_RETURN_ADDRESS(0));
833  }
834  }
835 #endif
836 
837  if (__kmp_env_consistency_check) {
838 #if KMP_USE_DYNAMIC_LOCK
839  if (status)
840  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
841  else
842  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
843 #else
844  if (status)
845  __kmp_push_sync(global_tid, ct_master, loc, NULL);
846  else
847  __kmp_check_sync(global_tid, ct_master, loc, NULL);
848 #endif
849  }
850 
851  return status;
852 }
853 
862 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
863  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
864  __kmp_assert_valid_gtid(global_tid);
865  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
866  KMP_POP_PARTITIONED_TIMER();
867 
868 #if OMPT_SUPPORT && OMPT_OPTIONAL
869  kmp_info_t *this_thr = __kmp_threads[global_tid];
870  kmp_team_t *team = this_thr->th.th_team;
871  if (ompt_enabled.ompt_callback_masked) {
872  int tid = __kmp_tid_from_gtid(global_tid);
873  ompt_callbacks.ompt_callback(ompt_callback_masked)(
874  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
875  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
876  OMPT_GET_RETURN_ADDRESS(0));
877  }
878 #endif
879 
880  if (__kmp_env_consistency_check) {
881  if (KMP_MASTER_GTID(global_tid))
882  __kmp_pop_sync(global_tid, ct_master, loc);
883  }
884 }
885 
894 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
895  int status = 0;
896  int tid;
897  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
898  __kmp_assert_valid_gtid(global_tid);
899 
900  if (!TCR_4(__kmp_init_parallel))
901  __kmp_parallel_initialize();
902 
903  __kmp_resume_if_soft_paused();
904 
905  tid = __kmp_tid_from_gtid(global_tid);
906  if (tid == filter) {
907  KMP_COUNT_BLOCK(OMP_MASKED);
908  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
909  status = 1;
910  }
911 
912 #if OMPT_SUPPORT && OMPT_OPTIONAL
913  if (status) {
914  if (ompt_enabled.ompt_callback_masked) {
915  kmp_info_t *this_thr = __kmp_threads[global_tid];
916  kmp_team_t *team = this_thr->th.th_team;
917  ompt_callbacks.ompt_callback(ompt_callback_masked)(
918  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
919  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
920  OMPT_GET_RETURN_ADDRESS(0));
921  }
922  }
923 #endif
924 
925  if (__kmp_env_consistency_check) {
926 #if KMP_USE_DYNAMIC_LOCK
927  if (status)
928  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
929  else
930  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
931 #else
932  if (status)
933  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
934  else
935  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
936 #endif
937  }
938 
939  return status;
940 }
941 
950 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
951  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
952  __kmp_assert_valid_gtid(global_tid);
953  KMP_POP_PARTITIONED_TIMER();
954 
955 #if OMPT_SUPPORT && OMPT_OPTIONAL
956  kmp_info_t *this_thr = __kmp_threads[global_tid];
957  kmp_team_t *team = this_thr->th.th_team;
958  if (ompt_enabled.ompt_callback_masked) {
959  int tid = __kmp_tid_from_gtid(global_tid);
960  ompt_callbacks.ompt_callback(ompt_callback_masked)(
961  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
962  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
963  OMPT_GET_RETURN_ADDRESS(0));
964  }
965 #endif
966 
967  if (__kmp_env_consistency_check) {
968  __kmp_pop_sync(global_tid, ct_masked, loc);
969  }
970 }
971 
979 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
980  int cid = 0;
981  kmp_info_t *th;
982  KMP_DEBUG_ASSERT(__kmp_init_serial);
983 
984  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
985  __kmp_assert_valid_gtid(gtid);
986 
987  if (!TCR_4(__kmp_init_parallel))
988  __kmp_parallel_initialize();
989 
990  __kmp_resume_if_soft_paused();
991 
992 #if USE_ITT_BUILD
993  __kmp_itt_ordered_prep(gtid);
994 // TODO: ordered_wait_id
995 #endif /* USE_ITT_BUILD */
996 
997  th = __kmp_threads[gtid];
998 
999 #if OMPT_SUPPORT && OMPT_OPTIONAL
1000  kmp_team_t *team;
1001  ompt_wait_id_t lck;
1002  void *codeptr_ra;
1003  OMPT_STORE_RETURN_ADDRESS(gtid);
1004  if (ompt_enabled.enabled) {
1005  team = __kmp_team_from_gtid(gtid);
1006  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1007  /* OMPT state update */
1008  th->th.ompt_thread_info.wait_id = lck;
1009  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1010 
1011  /* OMPT event callback */
1012  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1013  if (ompt_enabled.ompt_callback_mutex_acquire) {
1014  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1015  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1016  codeptr_ra);
1017  }
1018  }
1019 #endif
1020 
1021  if (th->th.th_dispatch->th_deo_fcn != 0)
1022  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1023  else
1024  __kmp_parallel_deo(&gtid, &cid, loc);
1025 
1026 #if OMPT_SUPPORT && OMPT_OPTIONAL
1027  if (ompt_enabled.enabled) {
1028  /* OMPT state update */
1029  th->th.ompt_thread_info.state = ompt_state_work_parallel;
1030  th->th.ompt_thread_info.wait_id = 0;
1031 
1032  /* OMPT event callback */
1033  if (ompt_enabled.ompt_callback_mutex_acquired) {
1034  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1035  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1036  }
1037  }
1038 #endif
1039 
1040 #if USE_ITT_BUILD
1041  __kmp_itt_ordered_start(gtid);
1042 #endif /* USE_ITT_BUILD */
1043 }
1044 
1052 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1053  int cid = 0;
1054  kmp_info_t *th;
1055 
1056  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1057  __kmp_assert_valid_gtid(gtid);
1058 
1059 #if USE_ITT_BUILD
1060  __kmp_itt_ordered_end(gtid);
1061 // TODO: ordered_wait_id
1062 #endif /* USE_ITT_BUILD */
1063 
1064  th = __kmp_threads[gtid];
1065 
1066  if (th->th.th_dispatch->th_dxo_fcn != 0)
1067  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1068  else
1069  __kmp_parallel_dxo(&gtid, &cid, loc);
1070 
1071 #if OMPT_SUPPORT && OMPT_OPTIONAL
1072  OMPT_STORE_RETURN_ADDRESS(gtid);
1073  if (ompt_enabled.ompt_callback_mutex_released) {
1074  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1075  ompt_mutex_ordered,
1076  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1077  ->t.t_ordered.dt.t_value,
1078  OMPT_LOAD_RETURN_ADDRESS(gtid));
1079  }
1080 #endif
1081 }
1082 
1083 #if KMP_USE_DYNAMIC_LOCK
1084 
1085 static __forceinline void
1086 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1087  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1088  // Pointer to the allocated indirect lock is written to crit, while indexing
1089  // is ignored.
1090  void *idx;
1091  kmp_indirect_lock_t **lck;
1092  lck = (kmp_indirect_lock_t **)crit;
1093  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1094  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1095  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1096  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1097  KA_TRACE(20,
1098  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1099 #if USE_ITT_BUILD
1100  __kmp_itt_critical_creating(ilk->lock, loc);
1101 #endif
1102  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1103  if (status == 0) {
1104 #if USE_ITT_BUILD
1105  __kmp_itt_critical_destroyed(ilk->lock);
1106 #endif
1107  // We don't really need to destroy the unclaimed lock here since it will be
1108  // cleaned up at program exit.
1109  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1110  }
1111  KMP_DEBUG_ASSERT(*lck != NULL);
1112 }
1113 
1114 // Fast-path acquire tas lock
1115 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1116  { \
1117  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1122  kmp_uint32 spins; \
1123  KMP_FSYNC_PREPARE(l); \
1124  KMP_INIT_YIELD(spins); \
1125  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1126  do { \
1127  if (TCR_4(__kmp_nth) > \
1128  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1129  KMP_YIELD(TRUE); \
1130  } else { \
1131  KMP_YIELD_SPIN(spins); \
1132  } \
1133  __kmp_spin_backoff(&backoff); \
1134  } while ( \
1135  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1137  } \
1138  KMP_FSYNC_ACQUIRED(l); \
1139  }
1140 
1141 // Fast-path test tas lock
1142 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1143  { \
1144  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1149  }
1150 
1151 // Fast-path release tas lock
1152 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1154 
1155 #if KMP_USE_FUTEX
1156 
1157 #include <sys/syscall.h>
1158 #include <unistd.h>
1159 #ifndef FUTEX_WAIT
1160 #define FUTEX_WAIT 0
1161 #endif
1162 #ifndef FUTEX_WAKE
1163 #define FUTEX_WAKE 1
1164 #endif
1165 
1166 // Fast-path acquire futex lock
1167 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1168  { \
1169  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170  kmp_int32 gtid_code = (gtid + 1) << 1; \
1171  KMP_MB(); \
1172  KMP_FSYNC_PREPARE(ftx); \
1173  kmp_int32 poll_val; \
1174  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1178  if (!cond) { \
1179  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1180  poll_val | \
1181  KMP_LOCK_BUSY(1, futex))) { \
1182  continue; \
1183  } \
1184  poll_val |= KMP_LOCK_BUSY(1, futex); \
1185  } \
1186  kmp_int32 rc; \
1187  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188  NULL, NULL, 0)) != 0) { \
1189  continue; \
1190  } \
1191  gtid_code |= 1; \
1192  } \
1193  KMP_FSYNC_ACQUIRED(ftx); \
1194  }
1195 
1196 // Fast-path test futex lock
1197 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1198  { \
1199  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202  KMP_FSYNC_ACQUIRED(ftx); \
1203  rc = TRUE; \
1204  } else { \
1205  rc = FALSE; \
1206  } \
1207  }
1208 
1209 // Fast-path release futex lock
1210 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1211  { \
1212  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1213  KMP_MB(); \
1214  KMP_FSYNC_RELEASING(ftx); \
1215  kmp_int32 poll_val = \
1216  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1220  } \
1221  KMP_MB(); \
1222  KMP_YIELD_OVERSUB(); \
1223  }
1224 
1225 #endif // KMP_USE_FUTEX
1226 
1227 #else // KMP_USE_DYNAMIC_LOCK
1228 
1229 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1230  ident_t const *loc,
1231  kmp_int32 gtid) {
1232  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1233 
1234  // Because of the double-check, the following load doesn't need to be volatile
1235  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1236 
1237  if (lck == NULL) {
1238  void *idx;
1239 
1240  // Allocate & initialize the lock.
1241  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1242  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1243  __kmp_init_user_lock_with_checks(lck);
1244  __kmp_set_user_lock_location(lck, loc);
1245 #if USE_ITT_BUILD
1246  __kmp_itt_critical_creating(lck);
1247 // __kmp_itt_critical_creating() should be called *before* the first usage
1248 // of underlying lock. It is the only place where we can guarantee it. There
1249 // are chances the lock will destroyed with no usage, but it is not a
1250 // problem, because this is not real event seen by user but rather setting
1251 // name for object (lock). See more details in kmp_itt.h.
1252 #endif /* USE_ITT_BUILD */
1253 
1254  // Use a cmpxchg instruction to slam the start of the critical section with
1255  // the lock pointer. If another thread beat us to it, deallocate the lock,
1256  // and use the lock that the other thread allocated.
1257  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1258 
1259  if (status == 0) {
1260 // Deallocate the lock and reload the value.
1261 #if USE_ITT_BUILD
1262  __kmp_itt_critical_destroyed(lck);
1263 // Let ITT know the lock is destroyed and the same memory location may be reused
1264 // for another purpose.
1265 #endif /* USE_ITT_BUILD */
1266  __kmp_destroy_user_lock_with_checks(lck);
1267  __kmp_user_lock_free(&idx, gtid, lck);
1268  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1269  KMP_DEBUG_ASSERT(lck != NULL);
1270  }
1271  }
1272  return lck;
1273 }
1274 
1275 #endif // KMP_USE_DYNAMIC_LOCK
1276 
1287 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1288  kmp_critical_name *crit) {
1289 #if KMP_USE_DYNAMIC_LOCK
1290 #if OMPT_SUPPORT && OMPT_OPTIONAL
1291  OMPT_STORE_RETURN_ADDRESS(global_tid);
1292 #endif // OMPT_SUPPORT
1293  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1294 #else
1295  KMP_COUNT_BLOCK(OMP_CRITICAL);
1296 #if OMPT_SUPPORT && OMPT_OPTIONAL
1297  ompt_state_t prev_state = ompt_state_undefined;
1298  ompt_thread_info_t ti;
1299 #endif
1300  kmp_user_lock_p lck;
1301 
1302  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1303  __kmp_assert_valid_gtid(global_tid);
1304 
1305  // TODO: add THR_OVHD_STATE
1306 
1307  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1308  KMP_CHECK_USER_LOCK_INIT();
1309 
1310  if ((__kmp_user_lock_kind == lk_tas) &&
1311  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1312  lck = (kmp_user_lock_p)crit;
1313  }
1314 #if KMP_USE_FUTEX
1315  else if ((__kmp_user_lock_kind == lk_futex) &&
1316  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1317  lck = (kmp_user_lock_p)crit;
1318  }
1319 #endif
1320  else { // ticket, queuing or drdpa
1321  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1322  }
1323 
1324  if (__kmp_env_consistency_check)
1325  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1326 
1327  // since the critical directive binds to all threads, not just the current
1328  // team we have to check this even if we are in a serialized team.
1329  // also, even if we are the uber thread, we still have to conduct the lock,
1330  // as we have to contend with sibling threads.
1331 
1332 #if USE_ITT_BUILD
1333  __kmp_itt_critical_acquiring(lck);
1334 #endif /* USE_ITT_BUILD */
1335 #if OMPT_SUPPORT && OMPT_OPTIONAL
1336  OMPT_STORE_RETURN_ADDRESS(gtid);
1337  void *codeptr_ra = NULL;
1338  if (ompt_enabled.enabled) {
1339  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1340  /* OMPT state update */
1341  prev_state = ti.state;
1342  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1343  ti.state = ompt_state_wait_critical;
1344 
1345  /* OMPT event callback */
1346  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1347  if (ompt_enabled.ompt_callback_mutex_acquire) {
1348  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1349  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1350  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1351  }
1352  }
1353 #endif
1354  // Value of 'crit' should be good for using as a critical_id of the critical
1355  // section directive.
1356  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1357 
1358 #if USE_ITT_BUILD
1359  __kmp_itt_critical_acquired(lck);
1360 #endif /* USE_ITT_BUILD */
1361 #if OMPT_SUPPORT && OMPT_OPTIONAL
1362  if (ompt_enabled.enabled) {
1363  /* OMPT state update */
1364  ti.state = prev_state;
1365  ti.wait_id = 0;
1366 
1367  /* OMPT event callback */
1368  if (ompt_enabled.ompt_callback_mutex_acquired) {
1369  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1370  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1371  }
1372  }
1373 #endif
1374  KMP_POP_PARTITIONED_TIMER();
1375 
1376  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1377  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1378 #endif // KMP_USE_DYNAMIC_LOCK
1379 }
1380 
1381 #if KMP_USE_DYNAMIC_LOCK
1382 
1383 // Converts the given hint to an internal lock implementation
1384 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1385 #if KMP_USE_TSX
1386 #define KMP_TSX_LOCK(seq) lockseq_##seq
1387 #else
1388 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1389 #endif
1390 
1391 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1393 #else
1394 #define KMP_CPUINFO_RTM 0
1395 #endif
1396 
1397  // Hints that do not require further logic
1398  if (hint & kmp_lock_hint_hle)
1399  return KMP_TSX_LOCK(hle);
1400  if (hint & kmp_lock_hint_rtm)
1401  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1402  if (hint & kmp_lock_hint_adaptive)
1403  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1404 
1405  // Rule out conflicting hints first by returning the default lock
1406  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1407  return __kmp_user_lock_seq;
1408  if ((hint & omp_lock_hint_speculative) &&
1409  (hint & omp_lock_hint_nonspeculative))
1410  return __kmp_user_lock_seq;
1411 
1412  // Do not even consider speculation when it appears to be contended
1413  if (hint & omp_lock_hint_contended)
1414  return lockseq_queuing;
1415 
1416  // Uncontended lock without speculation
1417  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1418  return lockseq_tas;
1419 
1420  // Use RTM lock for speculation
1421  if (hint & omp_lock_hint_speculative)
1422  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1423 
1424  return __kmp_user_lock_seq;
1425 }
1426 
1427 #if OMPT_SUPPORT && OMPT_OPTIONAL
1428 #if KMP_USE_DYNAMIC_LOCK
1429 static kmp_mutex_impl_t
1430 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1431  if (user_lock) {
1432  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1433  case 0:
1434  break;
1435 #if KMP_USE_FUTEX
1436  case locktag_futex:
1437  return kmp_mutex_impl_queuing;
1438 #endif
1439  case locktag_tas:
1440  return kmp_mutex_impl_spin;
1441 #if KMP_USE_TSX
1442  case locktag_hle:
1443  case locktag_rtm_spin:
1444  return kmp_mutex_impl_speculative;
1445 #endif
1446  default:
1447  return kmp_mutex_impl_none;
1448  }
1449  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1450  }
1451  KMP_ASSERT(ilock);
1452  switch (ilock->type) {
1453 #if KMP_USE_TSX
1454  case locktag_adaptive:
1455  case locktag_rtm_queuing:
1456  return kmp_mutex_impl_speculative;
1457 #endif
1458  case locktag_nested_tas:
1459  return kmp_mutex_impl_spin;
1460 #if KMP_USE_FUTEX
1461  case locktag_nested_futex:
1462 #endif
1463  case locktag_ticket:
1464  case locktag_queuing:
1465  case locktag_drdpa:
1466  case locktag_nested_ticket:
1467  case locktag_nested_queuing:
1468  case locktag_nested_drdpa:
1469  return kmp_mutex_impl_queuing;
1470  default:
1471  return kmp_mutex_impl_none;
1472  }
1473 }
1474 #else
1475 // For locks without dynamic binding
1476 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1477  switch (__kmp_user_lock_kind) {
1478  case lk_tas:
1479  return kmp_mutex_impl_spin;
1480 #if KMP_USE_FUTEX
1481  case lk_futex:
1482 #endif
1483  case lk_ticket:
1484  case lk_queuing:
1485  case lk_drdpa:
1486  return kmp_mutex_impl_queuing;
1487 #if KMP_USE_TSX
1488  case lk_hle:
1489  case lk_rtm_queuing:
1490  case lk_rtm_spin:
1491  case lk_adaptive:
1492  return kmp_mutex_impl_speculative;
1493 #endif
1494  default:
1495  return kmp_mutex_impl_none;
1496  }
1497 }
1498 #endif // KMP_USE_DYNAMIC_LOCK
1499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1500 
1514 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1515  kmp_critical_name *crit, uint32_t hint) {
1516  KMP_COUNT_BLOCK(OMP_CRITICAL);
1517  kmp_user_lock_p lck;
1518 #if OMPT_SUPPORT && OMPT_OPTIONAL
1519  ompt_state_t prev_state = ompt_state_undefined;
1520  ompt_thread_info_t ti;
1521  // This is the case, if called from __kmpc_critical:
1522  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1523  if (!codeptr)
1524  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1525 #endif
1526 
1527  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1528  __kmp_assert_valid_gtid(global_tid);
1529 
1530  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1531  // Check if it is initialized.
1532  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1533  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1534  if (*lk == 0) {
1535  if (KMP_IS_D_LOCK(lockseq)) {
1536  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1537  KMP_GET_D_TAG(lockseq));
1538  } else {
1539  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1540  }
1541  }
1542  // Branch for accessing the actual lock object and set operation. This
1543  // branching is inevitable since this lock initialization does not follow the
1544  // normal dispatch path (lock table is not used).
1545  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1546  lck = (kmp_user_lock_p)lk;
1547  if (__kmp_env_consistency_check) {
1548  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1549  __kmp_map_hint_to_lock(hint));
1550  }
1551 #if USE_ITT_BUILD
1552  __kmp_itt_critical_acquiring(lck);
1553 #endif
1554 #if OMPT_SUPPORT && OMPT_OPTIONAL
1555  if (ompt_enabled.enabled) {
1556  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1557  /* OMPT state update */
1558  prev_state = ti.state;
1559  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1560  ti.state = ompt_state_wait_critical;
1561 
1562  /* OMPT event callback */
1563  if (ompt_enabled.ompt_callback_mutex_acquire) {
1564  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1565  ompt_mutex_critical, (unsigned int)hint,
1566  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1567  codeptr);
1568  }
1569  }
1570 #endif
1571 #if KMP_USE_INLINED_TAS
1572  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1573  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1574  } else
1575 #elif KMP_USE_INLINED_FUTEX
1576  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1577  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1578  } else
1579 #endif
1580  {
1581  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1582  }
1583  } else {
1584  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1585  lck = ilk->lock;
1586  if (__kmp_env_consistency_check) {
1587  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1588  __kmp_map_hint_to_lock(hint));
1589  }
1590 #if USE_ITT_BUILD
1591  __kmp_itt_critical_acquiring(lck);
1592 #endif
1593 #if OMPT_SUPPORT && OMPT_OPTIONAL
1594  if (ompt_enabled.enabled) {
1595  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1596  /* OMPT state update */
1597  prev_state = ti.state;
1598  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1599  ti.state = ompt_state_wait_critical;
1600 
1601  /* OMPT event callback */
1602  if (ompt_enabled.ompt_callback_mutex_acquire) {
1603  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1604  ompt_mutex_critical, (unsigned int)hint,
1605  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1606  codeptr);
1607  }
1608  }
1609 #endif
1610  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1611  }
1612  KMP_POP_PARTITIONED_TIMER();
1613 
1614 #if USE_ITT_BUILD
1615  __kmp_itt_critical_acquired(lck);
1616 #endif /* USE_ITT_BUILD */
1617 #if OMPT_SUPPORT && OMPT_OPTIONAL
1618  if (ompt_enabled.enabled) {
1619  /* OMPT state update */
1620  ti.state = prev_state;
1621  ti.wait_id = 0;
1622 
1623  /* OMPT event callback */
1624  if (ompt_enabled.ompt_callback_mutex_acquired) {
1625  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1626  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1627  }
1628  }
1629 #endif
1630 
1631  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1632  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1633 } // __kmpc_critical_with_hint
1634 
1635 #endif // KMP_USE_DYNAMIC_LOCK
1636 
1646 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1647  kmp_critical_name *crit) {
1648  kmp_user_lock_p lck;
1649 
1650  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1651 
1652 #if KMP_USE_DYNAMIC_LOCK
1653  int locktag = KMP_EXTRACT_D_TAG(crit);
1654  if (locktag) {
1655  lck = (kmp_user_lock_p)crit;
1656  KMP_ASSERT(lck != NULL);
1657  if (__kmp_env_consistency_check) {
1658  __kmp_pop_sync(global_tid, ct_critical, loc);
1659  }
1660 #if USE_ITT_BUILD
1661  __kmp_itt_critical_releasing(lck);
1662 #endif
1663 #if KMP_USE_INLINED_TAS
1664  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1665  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1666  } else
1667 #elif KMP_USE_INLINED_FUTEX
1668  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1669  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1670  } else
1671 #endif
1672  {
1673  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1674  }
1675  } else {
1676  kmp_indirect_lock_t *ilk =
1677  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1678  KMP_ASSERT(ilk != NULL);
1679  lck = ilk->lock;
1680  if (__kmp_env_consistency_check) {
1681  __kmp_pop_sync(global_tid, ct_critical, loc);
1682  }
1683 #if USE_ITT_BUILD
1684  __kmp_itt_critical_releasing(lck);
1685 #endif
1686  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1687  }
1688 
1689 #else // KMP_USE_DYNAMIC_LOCK
1690 
1691  if ((__kmp_user_lock_kind == lk_tas) &&
1692  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1693  lck = (kmp_user_lock_p)crit;
1694  }
1695 #if KMP_USE_FUTEX
1696  else if ((__kmp_user_lock_kind == lk_futex) &&
1697  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1698  lck = (kmp_user_lock_p)crit;
1699  }
1700 #endif
1701  else { // ticket, queuing or drdpa
1702  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1703  }
1704 
1705  KMP_ASSERT(lck != NULL);
1706 
1707  if (__kmp_env_consistency_check)
1708  __kmp_pop_sync(global_tid, ct_critical, loc);
1709 
1710 #if USE_ITT_BUILD
1711  __kmp_itt_critical_releasing(lck);
1712 #endif /* USE_ITT_BUILD */
1713  // Value of 'crit' should be good for using as a critical_id of the critical
1714  // section directive.
1715  __kmp_release_user_lock_with_checks(lck, global_tid);
1716 
1717 #endif // KMP_USE_DYNAMIC_LOCK
1718 
1719 #if OMPT_SUPPORT && OMPT_OPTIONAL
1720  /* OMPT release event triggers after lock is released; place here to trigger
1721  * for all #if branches */
1722  OMPT_STORE_RETURN_ADDRESS(global_tid);
1723  if (ompt_enabled.ompt_callback_mutex_released) {
1724  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1725  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1726  OMPT_LOAD_RETURN_ADDRESS(0));
1727  }
1728 #endif
1729 
1730  KMP_POP_PARTITIONED_TIMER();
1731  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1732 }
1733 
1743 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1744  int status;
1745  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1746  __kmp_assert_valid_gtid(global_tid);
1747 
1748  if (!TCR_4(__kmp_init_parallel))
1749  __kmp_parallel_initialize();
1750 
1751  __kmp_resume_if_soft_paused();
1752 
1753  if (__kmp_env_consistency_check)
1754  __kmp_check_barrier(global_tid, ct_barrier, loc);
1755 
1756 #if OMPT_SUPPORT
1757  ompt_frame_t *ompt_frame;
1758  if (ompt_enabled.enabled) {
1759  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1760  if (ompt_frame->enter_frame.ptr == NULL)
1761  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1762  }
1763  OMPT_STORE_RETURN_ADDRESS(global_tid);
1764 #endif
1765 #if USE_ITT_NOTIFY
1766  __kmp_threads[global_tid]->th.th_ident = loc;
1767 #endif
1768  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL
1770  if (ompt_enabled.enabled) {
1771  ompt_frame->enter_frame = ompt_data_none;
1772  }
1773 #endif
1774 
1775  return (status != 0) ? 0 : 1;
1776 }
1777 
1787 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1788  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1789  __kmp_assert_valid_gtid(global_tid);
1790  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1791 }
1792 
1803 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1804  kmp_int32 ret;
1805  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1806  __kmp_assert_valid_gtid(global_tid);
1807 
1808  if (!TCR_4(__kmp_init_parallel))
1809  __kmp_parallel_initialize();
1810 
1811  __kmp_resume_if_soft_paused();
1812 
1813  if (__kmp_env_consistency_check) {
1814  if (loc == 0) {
1815  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1816  }
1817  __kmp_check_barrier(global_tid, ct_barrier, loc);
1818  }
1819 
1820 #if OMPT_SUPPORT
1821  ompt_frame_t *ompt_frame;
1822  if (ompt_enabled.enabled) {
1823  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1824  if (ompt_frame->enter_frame.ptr == NULL)
1825  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1826  }
1827  OMPT_STORE_RETURN_ADDRESS(global_tid);
1828 #endif
1829 #if USE_ITT_NOTIFY
1830  __kmp_threads[global_tid]->th.th_ident = loc;
1831 #endif
1832  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1833 #if OMPT_SUPPORT && OMPT_OPTIONAL
1834  if (ompt_enabled.enabled) {
1835  ompt_frame->enter_frame = ompt_data_none;
1836  }
1837 #endif
1838 
1839  ret = __kmpc_master(loc, global_tid);
1840 
1841  if (__kmp_env_consistency_check) {
1842  /* there's no __kmpc_end_master called; so the (stats) */
1843  /* actions of __kmpc_end_master are done here */
1844  if (ret) {
1845  /* only one thread should do the pop since only */
1846  /* one did the push (see __kmpc_master()) */
1847  __kmp_pop_sync(global_tid, ct_master, loc);
1848  }
1849  }
1850 
1851  return (ret);
1852 }
1853 
1854 /* The BARRIER for a SINGLE process section is always explicit */
1866 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1867  __kmp_assert_valid_gtid(global_tid);
1868  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1869 
1870  if (rc) {
1871  // We are going to execute the single statement, so we should count it.
1872  KMP_COUNT_BLOCK(OMP_SINGLE);
1873  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1874  }
1875 
1876 #if OMPT_SUPPORT && OMPT_OPTIONAL
1877  kmp_info_t *this_thr = __kmp_threads[global_tid];
1878  kmp_team_t *team = this_thr->th.th_team;
1879  int tid = __kmp_tid_from_gtid(global_tid);
1880 
1881  if (ompt_enabled.enabled) {
1882  if (rc) {
1883  if (ompt_enabled.ompt_callback_work) {
1884  ompt_callbacks.ompt_callback(ompt_callback_work)(
1885  ompt_work_single_executor, ompt_scope_begin,
1886  &(team->t.ompt_team_info.parallel_data),
1887  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1888  1, OMPT_GET_RETURN_ADDRESS(0));
1889  }
1890  } else {
1891  if (ompt_enabled.ompt_callback_work) {
1892  ompt_callbacks.ompt_callback(ompt_callback_work)(
1893  ompt_work_single_other, ompt_scope_begin,
1894  &(team->t.ompt_team_info.parallel_data),
1895  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1896  1, OMPT_GET_RETURN_ADDRESS(0));
1897  ompt_callbacks.ompt_callback(ompt_callback_work)(
1898  ompt_work_single_other, ompt_scope_end,
1899  &(team->t.ompt_team_info.parallel_data),
1900  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1901  1, OMPT_GET_RETURN_ADDRESS(0));
1902  }
1903  }
1904  }
1905 #endif
1906 
1907  return rc;
1908 }
1909 
1919 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1920  __kmp_assert_valid_gtid(global_tid);
1921  __kmp_exit_single(global_tid);
1922  KMP_POP_PARTITIONED_TIMER();
1923 
1924 #if OMPT_SUPPORT && OMPT_OPTIONAL
1925  kmp_info_t *this_thr = __kmp_threads[global_tid];
1926  kmp_team_t *team = this_thr->th.th_team;
1927  int tid = __kmp_tid_from_gtid(global_tid);
1928 
1929  if (ompt_enabled.ompt_callback_work) {
1930  ompt_callbacks.ompt_callback(ompt_callback_work)(
1931  ompt_work_single_executor, ompt_scope_end,
1932  &(team->t.ompt_team_info.parallel_data),
1933  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1934  OMPT_GET_RETURN_ADDRESS(0));
1935  }
1936 #endif
1937 }
1938 
1946 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1947  KMP_POP_PARTITIONED_TIMER();
1948  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1949 
1950 #if OMPT_SUPPORT && OMPT_OPTIONAL
1951  if (ompt_enabled.ompt_callback_work) {
1952  ompt_work_t ompt_work_type = ompt_work_loop;
1953  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1954  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1955  // Determine workshare type
1956  if (loc != NULL) {
1957  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1958  ompt_work_type = ompt_work_loop;
1959  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1960  ompt_work_type = ompt_work_sections;
1961  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1962  ompt_work_type = ompt_work_distribute;
1963  } else {
1964  // use default set above.
1965  // a warning about this case is provided in __kmpc_for_static_init
1966  }
1967  KMP_DEBUG_ASSERT(ompt_work_type);
1968  }
1969  ompt_callbacks.ompt_callback(ompt_callback_work)(
1970  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1971  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1972  }
1973 #endif
1974  if (__kmp_env_consistency_check)
1975  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1976 }
1977 
1978 // User routines which take C-style arguments (call by value)
1979 // different from the Fortran equivalent routines
1980 
1981 void ompc_set_num_threads(int arg) {
1982  // !!!!! TODO: check the per-task binding
1983  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1984 }
1985 
1986 void ompc_set_dynamic(int flag) {
1987  kmp_info_t *thread;
1988 
1989  /* For the thread-private implementation of the internal controls */
1990  thread = __kmp_entry_thread();
1991 
1992  __kmp_save_internal_controls(thread);
1993 
1994  set__dynamic(thread, flag ? true : false);
1995 }
1996 
1997 void ompc_set_nested(int flag) {
1998  kmp_info_t *thread;
1999 
2000  /* For the thread-private internal controls implementation */
2001  thread = __kmp_entry_thread();
2002 
2003  __kmp_save_internal_controls(thread);
2004 
2005  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2006 }
2007 
2008 void ompc_set_max_active_levels(int max_active_levels) {
2009  /* TO DO */
2010  /* we want per-task implementation of this internal control */
2011 
2012  /* For the per-thread internal controls implementation */
2013  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2014 }
2015 
2016 void ompc_set_schedule(omp_sched_t kind, int modifier) {
2017  // !!!!! TODO: check the per-task binding
2018  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2019 }
2020 
2021 int ompc_get_ancestor_thread_num(int level) {
2022  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2023 }
2024 
2025 int ompc_get_team_size(int level) {
2026  return __kmp_get_team_size(__kmp_entry_gtid(), level);
2027 }
2028 
2029 /* OpenMP 5.0 Affinity Format API */
2030 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2031  if (!__kmp_init_serial) {
2032  __kmp_serial_initialize();
2033  }
2034  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2035  format, KMP_STRLEN(format) + 1);
2036 }
2037 
2038 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2039  size_t format_size;
2040  if (!__kmp_init_serial) {
2041  __kmp_serial_initialize();
2042  }
2043  format_size = KMP_STRLEN(__kmp_affinity_format);
2044  if (buffer && size) {
2045  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2046  format_size + 1);
2047  }
2048  return format_size;
2049 }
2050 
2051 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2052  int gtid;
2053  if (!TCR_4(__kmp_init_middle)) {
2054  __kmp_middle_initialize();
2055  }
2056  __kmp_assign_root_init_mask();
2057  gtid = __kmp_get_gtid();
2058 #if KMP_AFFINITY_SUPPORTED
2059  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2060  __kmp_affinity.flags.reset) {
2061  __kmp_reset_root_init_mask(gtid);
2062  }
2063 #endif
2064  __kmp_aux_display_affinity(gtid, format);
2065 }
2066 
2067 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2068  char const *format) {
2069  int gtid;
2070  size_t num_required;
2071  kmp_str_buf_t capture_buf;
2072  if (!TCR_4(__kmp_init_middle)) {
2073  __kmp_middle_initialize();
2074  }
2075  __kmp_assign_root_init_mask();
2076  gtid = __kmp_get_gtid();
2077 #if KMP_AFFINITY_SUPPORTED
2078  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2079  __kmp_affinity.flags.reset) {
2080  __kmp_reset_root_init_mask(gtid);
2081  }
2082 #endif
2083  __kmp_str_buf_init(&capture_buf);
2084  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2085  if (buffer && buf_size) {
2086  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2087  capture_buf.used + 1);
2088  }
2089  __kmp_str_buf_free(&capture_buf);
2090  return num_required;
2091 }
2092 
2093 void kmpc_set_stacksize(int arg) {
2094  // __kmp_aux_set_stacksize initializes the library if needed
2095  __kmp_aux_set_stacksize(arg);
2096 }
2097 
2098 void kmpc_set_stacksize_s(size_t arg) {
2099  // __kmp_aux_set_stacksize initializes the library if needed
2100  __kmp_aux_set_stacksize(arg);
2101 }
2102 
2103 void kmpc_set_blocktime(int arg) {
2104  int gtid, tid, bt = arg;
2105  kmp_info_t *thread;
2106 
2107  gtid = __kmp_entry_gtid();
2108  tid = __kmp_tid_from_gtid(gtid);
2109  thread = __kmp_thread_from_gtid(gtid);
2110 
2111  __kmp_aux_convert_blocktime(&bt);
2112  __kmp_aux_set_blocktime(bt, thread, tid);
2113 }
2114 
2115 void kmpc_set_library(int arg) {
2116  // __kmp_user_set_library initializes the library if needed
2117  __kmp_user_set_library((enum library_type)arg);
2118 }
2119 
2120 void kmpc_set_defaults(char const *str) {
2121  // __kmp_aux_set_defaults initializes the library if needed
2122  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2123 }
2124 
2125 void kmpc_set_disp_num_buffers(int arg) {
2126  // ignore after initialization because some teams have already
2127  // allocated dispatch buffers
2128  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2129  arg <= KMP_MAX_DISP_NUM_BUFF) {
2130  __kmp_dispatch_num_buffers = arg;
2131  }
2132 }
2133 
2134 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2135 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2136  return -1;
2137 #else
2138  if (!TCR_4(__kmp_init_middle)) {
2139  __kmp_middle_initialize();
2140  }
2141  __kmp_assign_root_init_mask();
2142  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2143 #endif
2144 }
2145 
2146 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2147 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2148  return -1;
2149 #else
2150  if (!TCR_4(__kmp_init_middle)) {
2151  __kmp_middle_initialize();
2152  }
2153  __kmp_assign_root_init_mask();
2154  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2155 #endif
2156 }
2157 
2158 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2159 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2160  return -1;
2161 #else
2162  if (!TCR_4(__kmp_init_middle)) {
2163  __kmp_middle_initialize();
2164  }
2165  __kmp_assign_root_init_mask();
2166  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2167 #endif
2168 }
2169 
2170 /* -------------------------------------------------------------------------- */
2215 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2216  void *cpy_data, void (*cpy_func)(void *, void *),
2217  kmp_int32 didit) {
2218  void **data_ptr;
2219  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2220  __kmp_assert_valid_gtid(gtid);
2221 
2222  KMP_MB();
2223 
2224  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2225 
2226  if (__kmp_env_consistency_check) {
2227  if (loc == 0) {
2228  KMP_WARNING(ConstructIdentInvalid);
2229  }
2230  }
2231 
2232  // ToDo: Optimize the following two barriers into some kind of split barrier
2233 
2234  if (didit)
2235  *data_ptr = cpy_data;
2236 
2237 #if OMPT_SUPPORT
2238  ompt_frame_t *ompt_frame;
2239  if (ompt_enabled.enabled) {
2240  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2241  if (ompt_frame->enter_frame.ptr == NULL)
2242  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2243  }
2244  OMPT_STORE_RETURN_ADDRESS(gtid);
2245 #endif
2246 /* This barrier is not a barrier region boundary */
2247 #if USE_ITT_NOTIFY
2248  __kmp_threads[gtid]->th.th_ident = loc;
2249 #endif
2250  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2251 
2252  if (!didit)
2253  (*cpy_func)(cpy_data, *data_ptr);
2254 
2255  // Consider next barrier a user-visible barrier for barrier region boundaries
2256  // Nesting checks are already handled by the single construct checks
2257  {
2258 #if OMPT_SUPPORT
2259  OMPT_STORE_RETURN_ADDRESS(gtid);
2260 #endif
2261 #if USE_ITT_NOTIFY
2262  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2263 // tasks can overwrite the location)
2264 #endif
2265  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2266 #if OMPT_SUPPORT && OMPT_OPTIONAL
2267  if (ompt_enabled.enabled) {
2268  ompt_frame->enter_frame = ompt_data_none;
2269  }
2270 #endif
2271  }
2272 }
2273 
2274 /* --------------------------------------------------------------------------*/
2291 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2292  void **data_ptr;
2293 
2294  KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2295 
2296  KMP_MB();
2297 
2298  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2299 
2300  if (__kmp_env_consistency_check) {
2301  if (loc == 0) {
2302  KMP_WARNING(ConstructIdentInvalid);
2303  }
2304  }
2305 
2306  // ToDo: Optimize the following barrier
2307 
2308  if (cpy_data)
2309  *data_ptr = cpy_data;
2310 
2311 #if OMPT_SUPPORT
2312  ompt_frame_t *ompt_frame;
2313  if (ompt_enabled.enabled) {
2314  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2315  if (ompt_frame->enter_frame.ptr == NULL)
2316  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2317  OMPT_STORE_RETURN_ADDRESS(gtid);
2318  }
2319 #endif
2320 /* This barrier is not a barrier region boundary */
2321 #if USE_ITT_NOTIFY
2322  __kmp_threads[gtid]->th.th_ident = loc;
2323 #endif
2324  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2325 
2326  return *data_ptr;
2327 }
2328 
2329 /* -------------------------------------------------------------------------- */
2330 
2331 #define INIT_LOCK __kmp_init_user_lock_with_checks
2332 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2333 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2334 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2335 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2336 #define ACQUIRE_NESTED_LOCK_TIMED \
2337  __kmp_acquire_nested_user_lock_with_checks_timed
2338 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2339 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2340 #define TEST_LOCK __kmp_test_user_lock_with_checks
2341 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2342 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2343 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2344 
2345 // TODO: Make check abort messages use location info & pass it into
2346 // with_checks routines
2347 
2348 #if KMP_USE_DYNAMIC_LOCK
2349 
2350 // internal lock initializer
2351 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2352  kmp_dyna_lockseq_t seq) {
2353  if (KMP_IS_D_LOCK(seq)) {
2354  KMP_INIT_D_LOCK(lock, seq);
2355 #if USE_ITT_BUILD
2356  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2357 #endif
2358  } else {
2359  KMP_INIT_I_LOCK(lock, seq);
2360 #if USE_ITT_BUILD
2361  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2362  __kmp_itt_lock_creating(ilk->lock, loc);
2363 #endif
2364  }
2365 }
2366 
2367 // internal nest lock initializer
2368 static __forceinline void
2369 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2370  kmp_dyna_lockseq_t seq) {
2371 #if KMP_USE_TSX
2372  // Don't have nested lock implementation for speculative locks
2373  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2374  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2375  seq = __kmp_user_lock_seq;
2376 #endif
2377  switch (seq) {
2378  case lockseq_tas:
2379  seq = lockseq_nested_tas;
2380  break;
2381 #if KMP_USE_FUTEX
2382  case lockseq_futex:
2383  seq = lockseq_nested_futex;
2384  break;
2385 #endif
2386  case lockseq_ticket:
2387  seq = lockseq_nested_ticket;
2388  break;
2389  case lockseq_queuing:
2390  seq = lockseq_nested_queuing;
2391  break;
2392  case lockseq_drdpa:
2393  seq = lockseq_nested_drdpa;
2394  break;
2395  default:
2396  seq = lockseq_nested_queuing;
2397  }
2398  KMP_INIT_I_LOCK(lock, seq);
2399 #if USE_ITT_BUILD
2400  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2401  __kmp_itt_lock_creating(ilk->lock, loc);
2402 #endif
2403 }
2404 
2405 /* initialize the lock with a hint */
2406 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2407  uintptr_t hint) {
2408  KMP_DEBUG_ASSERT(__kmp_init_serial);
2409  if (__kmp_env_consistency_check && user_lock == NULL) {
2410  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2411  }
2412 
2413  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2414 
2415 #if OMPT_SUPPORT && OMPT_OPTIONAL
2416  // This is the case, if called from omp_init_lock_with_hint:
2417  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2418  if (!codeptr)
2419  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2420  if (ompt_enabled.ompt_callback_lock_init) {
2421  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2422  ompt_mutex_lock, (omp_lock_hint_t)hint,
2423  __ompt_get_mutex_impl_type(user_lock),
2424  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2425  }
2426 #endif
2427 }
2428 
2429 /* initialize the lock with a hint */
2430 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2431  void **user_lock, uintptr_t hint) {
2432  KMP_DEBUG_ASSERT(__kmp_init_serial);
2433  if (__kmp_env_consistency_check && user_lock == NULL) {
2434  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2435  }
2436 
2437  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2438 
2439 #if OMPT_SUPPORT && OMPT_OPTIONAL
2440  // This is the case, if called from omp_init_lock_with_hint:
2441  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2442  if (!codeptr)
2443  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444  if (ompt_enabled.ompt_callback_lock_init) {
2445  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2447  __ompt_get_mutex_impl_type(user_lock),
2448  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449  }
2450 #endif
2451 }
2452 
2453 #endif // KMP_USE_DYNAMIC_LOCK
2454 
2455 /* initialize the lock */
2456 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2457 #if KMP_USE_DYNAMIC_LOCK
2458 
2459  KMP_DEBUG_ASSERT(__kmp_init_serial);
2460  if (__kmp_env_consistency_check && user_lock == NULL) {
2461  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2462  }
2463  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2464 
2465 #if OMPT_SUPPORT && OMPT_OPTIONAL
2466  // This is the case, if called from omp_init_lock_with_hint:
2467  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2468  if (!codeptr)
2469  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470  if (ompt_enabled.ompt_callback_lock_init) {
2471  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2472  ompt_mutex_lock, omp_lock_hint_none,
2473  __ompt_get_mutex_impl_type(user_lock),
2474  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2475  }
2476 #endif
2477 
2478 #else // KMP_USE_DYNAMIC_LOCK
2479 
2480  static char const *const func = "omp_init_lock";
2481  kmp_user_lock_p lck;
2482  KMP_DEBUG_ASSERT(__kmp_init_serial);
2483 
2484  if (__kmp_env_consistency_check) {
2485  if (user_lock == NULL) {
2486  KMP_FATAL(LockIsUninitialized, func);
2487  }
2488  }
2489 
2490  KMP_CHECK_USER_LOCK_INIT();
2491 
2492  if ((__kmp_user_lock_kind == lk_tas) &&
2493  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2494  lck = (kmp_user_lock_p)user_lock;
2495  }
2496 #if KMP_USE_FUTEX
2497  else if ((__kmp_user_lock_kind == lk_futex) &&
2498  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2499  lck = (kmp_user_lock_p)user_lock;
2500  }
2501 #endif
2502  else {
2503  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2504  }
2505  INIT_LOCK(lck);
2506  __kmp_set_user_lock_location(lck, loc);
2507 
2508 #if OMPT_SUPPORT && OMPT_OPTIONAL
2509  // This is the case, if called from omp_init_lock_with_hint:
2510  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2511  if (!codeptr)
2512  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2513  if (ompt_enabled.ompt_callback_lock_init) {
2514  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2515  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2516  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2517  }
2518 #endif
2519 
2520 #if USE_ITT_BUILD
2521  __kmp_itt_lock_creating(lck);
2522 #endif /* USE_ITT_BUILD */
2523 
2524 #endif // KMP_USE_DYNAMIC_LOCK
2525 } // __kmpc_init_lock
2526 
2527 /* initialize the lock */
2528 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2529 #if KMP_USE_DYNAMIC_LOCK
2530 
2531  KMP_DEBUG_ASSERT(__kmp_init_serial);
2532  if (__kmp_env_consistency_check && user_lock == NULL) {
2533  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2534  }
2535  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2536 
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2538  // This is the case, if called from omp_init_lock_with_hint:
2539  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540  if (!codeptr)
2541  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542  if (ompt_enabled.ompt_callback_lock_init) {
2543  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2544  ompt_mutex_nest_lock, omp_lock_hint_none,
2545  __ompt_get_mutex_impl_type(user_lock),
2546  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2547  }
2548 #endif
2549 
2550 #else // KMP_USE_DYNAMIC_LOCK
2551 
2552  static char const *const func = "omp_init_nest_lock";
2553  kmp_user_lock_p lck;
2554  KMP_DEBUG_ASSERT(__kmp_init_serial);
2555 
2556  if (__kmp_env_consistency_check) {
2557  if (user_lock == NULL) {
2558  KMP_FATAL(LockIsUninitialized, func);
2559  }
2560  }
2561 
2562  KMP_CHECK_USER_LOCK_INIT();
2563 
2564  if ((__kmp_user_lock_kind == lk_tas) &&
2565  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2566  OMP_NEST_LOCK_T_SIZE)) {
2567  lck = (kmp_user_lock_p)user_lock;
2568  }
2569 #if KMP_USE_FUTEX
2570  else if ((__kmp_user_lock_kind == lk_futex) &&
2571  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2572  OMP_NEST_LOCK_T_SIZE)) {
2573  lck = (kmp_user_lock_p)user_lock;
2574  }
2575 #endif
2576  else {
2577  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2578  }
2579 
2580  INIT_NESTED_LOCK(lck);
2581  __kmp_set_user_lock_location(lck, loc);
2582 
2583 #if OMPT_SUPPORT && OMPT_OPTIONAL
2584  // This is the case, if called from omp_init_lock_with_hint:
2585  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2586  if (!codeptr)
2587  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2588  if (ompt_enabled.ompt_callback_lock_init) {
2589  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2590  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2591  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592  }
2593 #endif
2594 
2595 #if USE_ITT_BUILD
2596  __kmp_itt_lock_creating(lck);
2597 #endif /* USE_ITT_BUILD */
2598 
2599 #endif // KMP_USE_DYNAMIC_LOCK
2600 } // __kmpc_init_nest_lock
2601 
2602 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2603 #if KMP_USE_DYNAMIC_LOCK
2604 
2605 #if USE_ITT_BUILD
2606  kmp_user_lock_p lck;
2607  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2608  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2609  } else {
2610  lck = (kmp_user_lock_p)user_lock;
2611  }
2612  __kmp_itt_lock_destroyed(lck);
2613 #endif
2614 #if OMPT_SUPPORT && OMPT_OPTIONAL
2615  // This is the case, if called from omp_init_lock_with_hint:
2616  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617  if (!codeptr)
2618  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619  if (ompt_enabled.ompt_callback_lock_destroy) {
2620  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2621  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2622  }
2623 #endif
2624  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2625 #else
2626  kmp_user_lock_p lck;
2627 
2628  if ((__kmp_user_lock_kind == lk_tas) &&
2629  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2630  lck = (kmp_user_lock_p)user_lock;
2631  }
2632 #if KMP_USE_FUTEX
2633  else if ((__kmp_user_lock_kind == lk_futex) &&
2634  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2635  lck = (kmp_user_lock_p)user_lock;
2636  }
2637 #endif
2638  else {
2639  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2640  }
2641 
2642 #if OMPT_SUPPORT && OMPT_OPTIONAL
2643  // This is the case, if called from omp_init_lock_with_hint:
2644  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2645  if (!codeptr)
2646  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2647  if (ompt_enabled.ompt_callback_lock_destroy) {
2648  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2649  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2650  }
2651 #endif
2652 
2653 #if USE_ITT_BUILD
2654  __kmp_itt_lock_destroyed(lck);
2655 #endif /* USE_ITT_BUILD */
2656  DESTROY_LOCK(lck);
2657 
2658  if ((__kmp_user_lock_kind == lk_tas) &&
2659  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2660  ;
2661  }
2662 #if KMP_USE_FUTEX
2663  else if ((__kmp_user_lock_kind == lk_futex) &&
2664  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2665  ;
2666  }
2667 #endif
2668  else {
2669  __kmp_user_lock_free(user_lock, gtid, lck);
2670  }
2671 #endif // KMP_USE_DYNAMIC_LOCK
2672 } // __kmpc_destroy_lock
2673 
2674 /* destroy the lock */
2675 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2676 #if KMP_USE_DYNAMIC_LOCK
2677 
2678 #if USE_ITT_BUILD
2679  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2680  __kmp_itt_lock_destroyed(ilk->lock);
2681 #endif
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683  // This is the case, if called from omp_init_lock_with_hint:
2684  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685  if (!codeptr)
2686  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687  if (ompt_enabled.ompt_callback_lock_destroy) {
2688  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690  }
2691 #endif
2692  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2693 
2694 #else // KMP_USE_DYNAMIC_LOCK
2695 
2696  kmp_user_lock_p lck;
2697 
2698  if ((__kmp_user_lock_kind == lk_tas) &&
2699  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2700  OMP_NEST_LOCK_T_SIZE)) {
2701  lck = (kmp_user_lock_p)user_lock;
2702  }
2703 #if KMP_USE_FUTEX
2704  else if ((__kmp_user_lock_kind == lk_futex) &&
2705  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2706  OMP_NEST_LOCK_T_SIZE)) {
2707  lck = (kmp_user_lock_p)user_lock;
2708  }
2709 #endif
2710  else {
2711  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2712  }
2713 
2714 #if OMPT_SUPPORT && OMPT_OPTIONAL
2715  // This is the case, if called from omp_init_lock_with_hint:
2716  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2717  if (!codeptr)
2718  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2719  if (ompt_enabled.ompt_callback_lock_destroy) {
2720  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2721  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2722  }
2723 #endif
2724 
2725 #if USE_ITT_BUILD
2726  __kmp_itt_lock_destroyed(lck);
2727 #endif /* USE_ITT_BUILD */
2728 
2729  DESTROY_NESTED_LOCK(lck);
2730 
2731  if ((__kmp_user_lock_kind == lk_tas) &&
2732  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2733  OMP_NEST_LOCK_T_SIZE)) {
2734  ;
2735  }
2736 #if KMP_USE_FUTEX
2737  else if ((__kmp_user_lock_kind == lk_futex) &&
2738  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2739  OMP_NEST_LOCK_T_SIZE)) {
2740  ;
2741  }
2742 #endif
2743  else {
2744  __kmp_user_lock_free(user_lock, gtid, lck);
2745  }
2746 #endif // KMP_USE_DYNAMIC_LOCK
2747 } // __kmpc_destroy_nest_lock
2748 
2749 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2750  KMP_COUNT_BLOCK(OMP_set_lock);
2751 #if KMP_USE_DYNAMIC_LOCK
2752  int tag = KMP_EXTRACT_D_TAG(user_lock);
2753 #if USE_ITT_BUILD
2754  __kmp_itt_lock_acquiring(
2755  (kmp_user_lock_p)
2756  user_lock); // itt function will get to the right lock object.
2757 #endif
2758 #if OMPT_SUPPORT && OMPT_OPTIONAL
2759  // This is the case, if called from omp_init_lock_with_hint:
2760  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2761  if (!codeptr)
2762  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2763  if (ompt_enabled.ompt_callback_mutex_acquire) {
2764  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2765  ompt_mutex_lock, omp_lock_hint_none,
2766  __ompt_get_mutex_impl_type(user_lock),
2767  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2768  }
2769 #endif
2770 #if KMP_USE_INLINED_TAS
2771  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2772  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2773  } else
2774 #elif KMP_USE_INLINED_FUTEX
2775  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2776  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2777  } else
2778 #endif
2779  {
2780  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2781  }
2782 #if USE_ITT_BUILD
2783  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2784 #endif
2785 #if OMPT_SUPPORT && OMPT_OPTIONAL
2786  if (ompt_enabled.ompt_callback_mutex_acquired) {
2787  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2788  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2789  }
2790 #endif
2791 
2792 #else // KMP_USE_DYNAMIC_LOCK
2793 
2794  kmp_user_lock_p lck;
2795 
2796  if ((__kmp_user_lock_kind == lk_tas) &&
2797  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2798  lck = (kmp_user_lock_p)user_lock;
2799  }
2800 #if KMP_USE_FUTEX
2801  else if ((__kmp_user_lock_kind == lk_futex) &&
2802  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2803  lck = (kmp_user_lock_p)user_lock;
2804  }
2805 #endif
2806  else {
2807  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2808  }
2809 
2810 #if USE_ITT_BUILD
2811  __kmp_itt_lock_acquiring(lck);
2812 #endif /* USE_ITT_BUILD */
2813 #if OMPT_SUPPORT && OMPT_OPTIONAL
2814  // This is the case, if called from omp_init_lock_with_hint:
2815  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816  if (!codeptr)
2817  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2818  if (ompt_enabled.ompt_callback_mutex_acquire) {
2819  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2821  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2822  }
2823 #endif
2824 
2825  ACQUIRE_LOCK(lck, gtid);
2826 
2827 #if USE_ITT_BUILD
2828  __kmp_itt_lock_acquired(lck);
2829 #endif /* USE_ITT_BUILD */
2830 
2831 #if OMPT_SUPPORT && OMPT_OPTIONAL
2832  if (ompt_enabled.ompt_callback_mutex_acquired) {
2833  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2834  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2835  }
2836 #endif
2837 
2838 #endif // KMP_USE_DYNAMIC_LOCK
2839 }
2840 
2841 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2842 #if KMP_USE_DYNAMIC_LOCK
2843 
2844 #if USE_ITT_BUILD
2845  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2846 #endif
2847 #if OMPT_SUPPORT && OMPT_OPTIONAL
2848  // This is the case, if called from omp_init_lock_with_hint:
2849  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2850  if (!codeptr)
2851  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2852  if (ompt_enabled.enabled) {
2853  if (ompt_enabled.ompt_callback_mutex_acquire) {
2854  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2855  ompt_mutex_nest_lock, omp_lock_hint_none,
2856  __ompt_get_mutex_impl_type(user_lock),
2857  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2858  }
2859  }
2860 #endif
2861  int acquire_status =
2862  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2863  (void)acquire_status;
2864 #if USE_ITT_BUILD
2865  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2866 #endif
2867 
2868 #if OMPT_SUPPORT && OMPT_OPTIONAL
2869  if (ompt_enabled.enabled) {
2870  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2871  if (ompt_enabled.ompt_callback_mutex_acquired) {
2872  // lock_first
2873  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2874  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2875  codeptr);
2876  }
2877  } else {
2878  if (ompt_enabled.ompt_callback_nest_lock) {
2879  // lock_next
2880  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2881  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882  }
2883  }
2884  }
2885 #endif
2886 
2887 #else // KMP_USE_DYNAMIC_LOCK
2888  int acquire_status;
2889  kmp_user_lock_p lck;
2890 
2891  if ((__kmp_user_lock_kind == lk_tas) &&
2892  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2893  OMP_NEST_LOCK_T_SIZE)) {
2894  lck = (kmp_user_lock_p)user_lock;
2895  }
2896 #if KMP_USE_FUTEX
2897  else if ((__kmp_user_lock_kind == lk_futex) &&
2898  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2899  OMP_NEST_LOCK_T_SIZE)) {
2900  lck = (kmp_user_lock_p)user_lock;
2901  }
2902 #endif
2903  else {
2904  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2905  }
2906 
2907 #if USE_ITT_BUILD
2908  __kmp_itt_lock_acquiring(lck);
2909 #endif /* USE_ITT_BUILD */
2910 #if OMPT_SUPPORT && OMPT_OPTIONAL
2911  // This is the case, if called from omp_init_lock_with_hint:
2912  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2913  if (!codeptr)
2914  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2915  if (ompt_enabled.enabled) {
2916  if (ompt_enabled.ompt_callback_mutex_acquire) {
2917  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2918  ompt_mutex_nest_lock, omp_lock_hint_none,
2919  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2920  codeptr);
2921  }
2922  }
2923 #endif
2924 
2925  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2926 
2927 #if USE_ITT_BUILD
2928  __kmp_itt_lock_acquired(lck);
2929 #endif /* USE_ITT_BUILD */
2930 
2931 #if OMPT_SUPPORT && OMPT_OPTIONAL
2932  if (ompt_enabled.enabled) {
2933  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2934  if (ompt_enabled.ompt_callback_mutex_acquired) {
2935  // lock_first
2936  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2937  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2938  }
2939  } else {
2940  if (ompt_enabled.ompt_callback_nest_lock) {
2941  // lock_next
2942  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2943  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2944  }
2945  }
2946  }
2947 #endif
2948 
2949 #endif // KMP_USE_DYNAMIC_LOCK
2950 }
2951 
2952 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2953 #if KMP_USE_DYNAMIC_LOCK
2954 
2955  int tag = KMP_EXTRACT_D_TAG(user_lock);
2956 #if USE_ITT_BUILD
2957  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2958 #endif
2959 #if KMP_USE_INLINED_TAS
2960  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2961  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2962  } else
2963 #elif KMP_USE_INLINED_FUTEX
2964  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2965  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2966  } else
2967 #endif
2968  {
2969  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2970  }
2971 
2972 #if OMPT_SUPPORT && OMPT_OPTIONAL
2973  // This is the case, if called from omp_init_lock_with_hint:
2974  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2975  if (!codeptr)
2976  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2977  if (ompt_enabled.ompt_callback_mutex_released) {
2978  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2979  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2980  }
2981 #endif
2982 
2983 #else // KMP_USE_DYNAMIC_LOCK
2984 
2985  kmp_user_lock_p lck;
2986 
2987  /* Can't use serial interval since not block structured */
2988  /* release the lock */
2989 
2990  if ((__kmp_user_lock_kind == lk_tas) &&
2991  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2992 #if KMP_OS_LINUX && \
2993  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2994 // "fast" path implemented to fix customer performance issue
2995 #if USE_ITT_BUILD
2996  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2997 #endif /* USE_ITT_BUILD */
2998  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2999  KMP_MB();
3000 
3001 #if OMPT_SUPPORT && OMPT_OPTIONAL
3002  // This is the case, if called from omp_init_lock_with_hint:
3003  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3004  if (!codeptr)
3005  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006  if (ompt_enabled.ompt_callback_mutex_released) {
3007  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009  }
3010 #endif
3011 
3012  return;
3013 #else
3014  lck = (kmp_user_lock_p)user_lock;
3015 #endif
3016  }
3017 #if KMP_USE_FUTEX
3018  else if ((__kmp_user_lock_kind == lk_futex) &&
3019  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3020  lck = (kmp_user_lock_p)user_lock;
3021  }
3022 #endif
3023  else {
3024  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3025  }
3026 
3027 #if USE_ITT_BUILD
3028  __kmp_itt_lock_releasing(lck);
3029 #endif /* USE_ITT_BUILD */
3030 
3031  RELEASE_LOCK(lck, gtid);
3032 
3033 #if OMPT_SUPPORT && OMPT_OPTIONAL
3034  // This is the case, if called from omp_init_lock_with_hint:
3035  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3036  if (!codeptr)
3037  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3038  if (ompt_enabled.ompt_callback_mutex_released) {
3039  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3040  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3041  }
3042 #endif
3043 
3044 #endif // KMP_USE_DYNAMIC_LOCK
3045 }
3046 
3047 /* release the lock */
3048 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3049 #if KMP_USE_DYNAMIC_LOCK
3050 
3051 #if USE_ITT_BUILD
3052  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3053 #endif
3054  int release_status =
3055  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3056  (void)release_status;
3057 
3058 #if OMPT_SUPPORT && OMPT_OPTIONAL
3059  // This is the case, if called from omp_init_lock_with_hint:
3060  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061  if (!codeptr)
3062  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063  if (ompt_enabled.enabled) {
3064  if (release_status == KMP_LOCK_RELEASED) {
3065  if (ompt_enabled.ompt_callback_mutex_released) {
3066  // release_lock_last
3067  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3068  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3069  codeptr);
3070  }
3071  } else if (ompt_enabled.ompt_callback_nest_lock) {
3072  // release_lock_prev
3073  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3074  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3075  }
3076  }
3077 #endif
3078 
3079 #else // KMP_USE_DYNAMIC_LOCK
3080 
3081  kmp_user_lock_p lck;
3082 
3083  /* Can't use serial interval since not block structured */
3084 
3085  if ((__kmp_user_lock_kind == lk_tas) &&
3086  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3087  OMP_NEST_LOCK_T_SIZE)) {
3088 #if KMP_OS_LINUX && \
3089  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3090  // "fast" path implemented to fix customer performance issue
3091  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3092 #if USE_ITT_BUILD
3093  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3094 #endif /* USE_ITT_BUILD */
3095 
3096 #if OMPT_SUPPORT && OMPT_OPTIONAL
3097  int release_status = KMP_LOCK_STILL_HELD;
3098 #endif
3099 
3100  if (--(tl->lk.depth_locked) == 0) {
3101  TCW_4(tl->lk.poll, 0);
3102 #if OMPT_SUPPORT && OMPT_OPTIONAL
3103  release_status = KMP_LOCK_RELEASED;
3104 #endif
3105  }
3106  KMP_MB();
3107 
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109  // This is the case, if called from omp_init_lock_with_hint:
3110  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3111  if (!codeptr)
3112  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3113  if (ompt_enabled.enabled) {
3114  if (release_status == KMP_LOCK_RELEASED) {
3115  if (ompt_enabled.ompt_callback_mutex_released) {
3116  // release_lock_last
3117  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3118  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3119  }
3120  } else if (ompt_enabled.ompt_callback_nest_lock) {
3121  // release_lock_previous
3122  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3123  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3124  }
3125  }
3126 #endif
3127 
3128  return;
3129 #else
3130  lck = (kmp_user_lock_p)user_lock;
3131 #endif
3132  }
3133 #if KMP_USE_FUTEX
3134  else if ((__kmp_user_lock_kind == lk_futex) &&
3135  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3136  OMP_NEST_LOCK_T_SIZE)) {
3137  lck = (kmp_user_lock_p)user_lock;
3138  }
3139 #endif
3140  else {
3141  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3142  }
3143 
3144 #if USE_ITT_BUILD
3145  __kmp_itt_lock_releasing(lck);
3146 #endif /* USE_ITT_BUILD */
3147 
3148  int release_status;
3149  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3150 #if OMPT_SUPPORT && OMPT_OPTIONAL
3151  // This is the case, if called from omp_init_lock_with_hint:
3152  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3153  if (!codeptr)
3154  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3155  if (ompt_enabled.enabled) {
3156  if (release_status == KMP_LOCK_RELEASED) {
3157  if (ompt_enabled.ompt_callback_mutex_released) {
3158  // release_lock_last
3159  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3160  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3161  }
3162  } else if (ompt_enabled.ompt_callback_nest_lock) {
3163  // release_lock_previous
3164  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3165  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3166  }
3167  }
3168 #endif
3169 
3170 #endif // KMP_USE_DYNAMIC_LOCK
3171 }
3172 
3173 /* try to acquire the lock */
3174 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3175  KMP_COUNT_BLOCK(OMP_test_lock);
3176 
3177 #if KMP_USE_DYNAMIC_LOCK
3178  int rc;
3179  int tag = KMP_EXTRACT_D_TAG(user_lock);
3180 #if USE_ITT_BUILD
3181  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3182 #endif
3183 #if OMPT_SUPPORT && OMPT_OPTIONAL
3184  // This is the case, if called from omp_init_lock_with_hint:
3185  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3186  if (!codeptr)
3187  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3188  if (ompt_enabled.ompt_callback_mutex_acquire) {
3189  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3190  ompt_mutex_test_lock, omp_lock_hint_none,
3191  __ompt_get_mutex_impl_type(user_lock),
3192  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3193  }
3194 #endif
3195 #if KMP_USE_INLINED_TAS
3196  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3197  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3198  } else
3199 #elif KMP_USE_INLINED_FUTEX
3200  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3201  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3202  } else
3203 #endif
3204  {
3205  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3206  }
3207  if (rc) {
3208 #if USE_ITT_BUILD
3209  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3210 #endif
3211 #if OMPT_SUPPORT && OMPT_OPTIONAL
3212  if (ompt_enabled.ompt_callback_mutex_acquired) {
3213  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3214  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3215  }
3216 #endif
3217  return FTN_TRUE;
3218  } else {
3219 #if USE_ITT_BUILD
3220  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3221 #endif
3222  return FTN_FALSE;
3223  }
3224 
3225 #else // KMP_USE_DYNAMIC_LOCK
3226 
3227  kmp_user_lock_p lck;
3228  int rc;
3229 
3230  if ((__kmp_user_lock_kind == lk_tas) &&
3231  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3232  lck = (kmp_user_lock_p)user_lock;
3233  }
3234 #if KMP_USE_FUTEX
3235  else if ((__kmp_user_lock_kind == lk_futex) &&
3236  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3237  lck = (kmp_user_lock_p)user_lock;
3238  }
3239 #endif
3240  else {
3241  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3242  }
3243 
3244 #if USE_ITT_BUILD
3245  __kmp_itt_lock_acquiring(lck);
3246 #endif /* USE_ITT_BUILD */
3247 #if OMPT_SUPPORT && OMPT_OPTIONAL
3248  // This is the case, if called from omp_init_lock_with_hint:
3249  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3250  if (!codeptr)
3251  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3252  if (ompt_enabled.ompt_callback_mutex_acquire) {
3253  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3254  ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3255  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3256  }
3257 #endif
3258 
3259  rc = TEST_LOCK(lck, gtid);
3260 #if USE_ITT_BUILD
3261  if (rc) {
3262  __kmp_itt_lock_acquired(lck);
3263  } else {
3264  __kmp_itt_lock_cancelled(lck);
3265  }
3266 #endif /* USE_ITT_BUILD */
3267 #if OMPT_SUPPORT && OMPT_OPTIONAL
3268  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3269  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3270  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3271  }
3272 #endif
3273 
3274  return (rc ? FTN_TRUE : FTN_FALSE);
3275 
3276  /* Can't use serial interval since not block structured */
3277 
3278 #endif // KMP_USE_DYNAMIC_LOCK
3279 }
3280 
3281 /* try to acquire the lock */
3282 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3283 #if KMP_USE_DYNAMIC_LOCK
3284  int rc;
3285 #if USE_ITT_BUILD
3286  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3287 #endif
3288 #if OMPT_SUPPORT && OMPT_OPTIONAL
3289  // This is the case, if called from omp_init_lock_with_hint:
3290  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3291  if (!codeptr)
3292  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3293  if (ompt_enabled.ompt_callback_mutex_acquire) {
3294  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3295  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3296  __ompt_get_mutex_impl_type(user_lock),
3297  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3298  }
3299 #endif
3300  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3301 #if USE_ITT_BUILD
3302  if (rc) {
3303  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3304  } else {
3305  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3306  }
3307 #endif
3308 #if OMPT_SUPPORT && OMPT_OPTIONAL
3309  if (ompt_enabled.enabled && rc) {
3310  if (rc == 1) {
3311  if (ompt_enabled.ompt_callback_mutex_acquired) {
3312  // lock_first
3313  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3314  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3315  codeptr);
3316  }
3317  } else {
3318  if (ompt_enabled.ompt_callback_nest_lock) {
3319  // lock_next
3320  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3321  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3322  }
3323  }
3324  }
3325 #endif
3326  return rc;
3327 
3328 #else // KMP_USE_DYNAMIC_LOCK
3329 
3330  kmp_user_lock_p lck;
3331  int rc;
3332 
3333  if ((__kmp_user_lock_kind == lk_tas) &&
3334  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3335  OMP_NEST_LOCK_T_SIZE)) {
3336  lck = (kmp_user_lock_p)user_lock;
3337  }
3338 #if KMP_USE_FUTEX
3339  else if ((__kmp_user_lock_kind == lk_futex) &&
3340  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3341  OMP_NEST_LOCK_T_SIZE)) {
3342  lck = (kmp_user_lock_p)user_lock;
3343  }
3344 #endif
3345  else {
3346  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3347  }
3348 
3349 #if USE_ITT_BUILD
3350  __kmp_itt_lock_acquiring(lck);
3351 #endif /* USE_ITT_BUILD */
3352 
3353 #if OMPT_SUPPORT && OMPT_OPTIONAL
3354  // This is the case, if called from omp_init_lock_with_hint:
3355  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3356  if (!codeptr)
3357  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3358  if (ompt_enabled.enabled) &&
3359  ompt_enabled.ompt_callback_mutex_acquire) {
3360  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3361  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3362  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3363  codeptr);
3364  }
3365 #endif
3366 
3367  rc = TEST_NESTED_LOCK(lck, gtid);
3368 #if USE_ITT_BUILD
3369  if (rc) {
3370  __kmp_itt_lock_acquired(lck);
3371  } else {
3372  __kmp_itt_lock_cancelled(lck);
3373  }
3374 #endif /* USE_ITT_BUILD */
3375 #if OMPT_SUPPORT && OMPT_OPTIONAL
3376  if (ompt_enabled.enabled && rc) {
3377  if (rc == 1) {
3378  if (ompt_enabled.ompt_callback_mutex_acquired) {
3379  // lock_first
3380  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3381  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3382  }
3383  } else {
3384  if (ompt_enabled.ompt_callback_nest_lock) {
3385  // lock_next
3386  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3387  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3388  }
3389  }
3390  }
3391 #endif
3392  return rc;
3393 
3394  /* Can't use serial interval since not block structured */
3395 
3396 #endif // KMP_USE_DYNAMIC_LOCK
3397 }
3398 
3399 // Interface to fast scalable reduce methods routines
3400 
3401 // keep the selected method in a thread local structure for cross-function
3402 // usage: will be used in __kmpc_end_reduce* functions;
3403 // another solution: to re-determine the method one more time in
3404 // __kmpc_end_reduce* functions (new prototype required then)
3405 // AT: which solution is better?
3406 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3407  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3408 
3409 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3410  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3411 
3412 // description of the packed_reduction_method variable: look at the macros in
3413 // kmp.h
3414 
3415 // used in a critical section reduce block
3416 static __forceinline void
3417 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3418  kmp_critical_name *crit) {
3419 
3420  // this lock was visible to a customer and to the threading profile tool as a
3421  // serial overhead span (although it's used for an internal purpose only)
3422  // why was it visible in previous implementation?
3423  // should we keep it visible in new reduce block?
3424  kmp_user_lock_p lck;
3425 
3426 #if KMP_USE_DYNAMIC_LOCK
3427 
3428  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3429  // Check if it is initialized.
3430  if (*lk == 0) {
3431  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3432  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3433  KMP_GET_D_TAG(__kmp_user_lock_seq));
3434  } else {
3435  __kmp_init_indirect_csptr(crit, loc, global_tid,
3436  KMP_GET_I_TAG(__kmp_user_lock_seq));
3437  }
3438  }
3439  // Branch for accessing the actual lock object and set operation. This
3440  // branching is inevitable since this lock initialization does not follow the
3441  // normal dispatch path (lock table is not used).
3442  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3443  lck = (kmp_user_lock_p)lk;
3444  KMP_DEBUG_ASSERT(lck != NULL);
3445  if (__kmp_env_consistency_check) {
3446  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3447  }
3448  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3449  } else {
3450  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3451  lck = ilk->lock;
3452  KMP_DEBUG_ASSERT(lck != NULL);
3453  if (__kmp_env_consistency_check) {
3454  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3455  }
3456  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3457  }
3458 
3459 #else // KMP_USE_DYNAMIC_LOCK
3460 
3461  // We know that the fast reduction code is only emitted by Intel compilers
3462  // with 32 byte critical sections. If there isn't enough space, then we
3463  // have to use a pointer.
3464  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3465  lck = (kmp_user_lock_p)crit;
3466  } else {
3467  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3468  }
3469  KMP_DEBUG_ASSERT(lck != NULL);
3470 
3471  if (__kmp_env_consistency_check)
3472  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3473 
3474  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3475 
3476 #endif // KMP_USE_DYNAMIC_LOCK
3477 }
3478 
3479 // used in a critical section reduce block
3480 static __forceinline void
3481 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3482  kmp_critical_name *crit) {
3483 
3484  kmp_user_lock_p lck;
3485 
3486 #if KMP_USE_DYNAMIC_LOCK
3487 
3488  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489  lck = (kmp_user_lock_p)crit;
3490  if (__kmp_env_consistency_check)
3491  __kmp_pop_sync(global_tid, ct_critical, loc);
3492  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3493  } else {
3494  kmp_indirect_lock_t *ilk =
3495  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3496  if (__kmp_env_consistency_check)
3497  __kmp_pop_sync(global_tid, ct_critical, loc);
3498  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3499  }
3500 
3501 #else // KMP_USE_DYNAMIC_LOCK
3502 
3503  // We know that the fast reduction code is only emitted by Intel compilers
3504  // with 32 byte critical sections. If there isn't enough space, then we have
3505  // to use a pointer.
3506  if (__kmp_base_user_lock_size > 32) {
3507  lck = *((kmp_user_lock_p *)crit);
3508  KMP_ASSERT(lck != NULL);
3509  } else {
3510  lck = (kmp_user_lock_p)crit;
3511  }
3512 
3513  if (__kmp_env_consistency_check)
3514  __kmp_pop_sync(global_tid, ct_critical, loc);
3515 
3516  __kmp_release_user_lock_with_checks(lck, global_tid);
3517 
3518 #endif // KMP_USE_DYNAMIC_LOCK
3519 } // __kmp_end_critical_section_reduce_block
3520 
3521 static __forceinline int
3522 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3523  int *task_state) {
3524  kmp_team_t *team;
3525 
3526  // Check if we are inside the teams construct?
3527  if (th->th.th_teams_microtask) {
3528  *team_p = team = th->th.th_team;
3529  if (team->t.t_level == th->th.th_teams_level) {
3530  // This is reduction at teams construct.
3531  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3532  // Let's swap teams temporarily for the reduction.
3533  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3534  th->th.th_team = team->t.t_parent;
3535  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3536  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3537  *task_state = th->th.th_task_state;
3538  th->th.th_task_state = 0;
3539 
3540  return 1;
3541  }
3542  }
3543  return 0;
3544 }
3545 
3546 static __forceinline void
3547 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3548  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3549  th->th.th_info.ds.ds_tid = 0;
3550  th->th.th_team = team;
3551  th->th.th_team_nproc = team->t.t_nproc;
3552  th->th.th_task_team = team->t.t_task_team[task_state];
3553  __kmp_type_convert(task_state, &(th->th.th_task_state));
3554 }
3555 
3556 /* 2.a.i. Reduce Block without a terminating barrier */
3572 kmp_int32
3573 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3574  size_t reduce_size, void *reduce_data,
3575  void (*reduce_func)(void *lhs_data, void *rhs_data),
3576  kmp_critical_name *lck) {
3577 
3578  KMP_COUNT_BLOCK(REDUCE_nowait);
3579  int retval = 0;
3580  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3581  kmp_info_t *th;
3582  kmp_team_t *team;
3583  int teams_swapped = 0, task_state;
3584  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3585  __kmp_assert_valid_gtid(global_tid);
3586 
3587  // why do we need this initialization here at all?
3588  // Reduction clause can not be used as a stand-alone directive.
3589 
3590  // do not call __kmp_serial_initialize(), it will be called by
3591  // __kmp_parallel_initialize() if needed
3592  // possible detection of false-positive race by the threadchecker ???
3593  if (!TCR_4(__kmp_init_parallel))
3594  __kmp_parallel_initialize();
3595 
3596  __kmp_resume_if_soft_paused();
3597 
3598 // check correctness of reduce block nesting
3599 #if KMP_USE_DYNAMIC_LOCK
3600  if (__kmp_env_consistency_check)
3601  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3602 #else
3603  if (__kmp_env_consistency_check)
3604  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3605 #endif
3606 
3607  th = __kmp_thread_from_gtid(global_tid);
3608  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3609 
3610  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3611  // the value should be kept in a variable
3612  // the variable should be either a construct-specific or thread-specific
3613  // property, not a team specific property
3614  // (a thread can reach the next reduce block on the next construct, reduce
3615  // method may differ on the next construct)
3616  // an ident_t "loc" parameter could be used as a construct-specific property
3617  // (what if loc == 0?)
3618  // (if both construct-specific and team-specific variables were shared,
3619  // then unness extra syncs should be needed)
3620  // a thread-specific variable is better regarding two issues above (next
3621  // construct and extra syncs)
3622  // a thread-specific "th_local.reduction_method" variable is used currently
3623  // each thread executes 'determine' and 'set' lines (no need to execute by one
3624  // thread, to avoid unness extra syncs)
3625 
3626  packed_reduction_method = __kmp_determine_reduction_method(
3627  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3628  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3629 
3630  OMPT_REDUCTION_DECL(th, global_tid);
3631  if (packed_reduction_method == critical_reduce_block) {
3632 
3633  OMPT_REDUCTION_BEGIN;
3634 
3635  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3636  retval = 1;
3637 
3638  } else if (packed_reduction_method == empty_reduce_block) {
3639 
3640  OMPT_REDUCTION_BEGIN;
3641 
3642  // usage: if team size == 1, no synchronization is required ( Intel
3643  // platforms only )
3644  retval = 1;
3645 
3646  } else if (packed_reduction_method == atomic_reduce_block) {
3647 
3648  retval = 2;
3649 
3650  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3651  // won't be called by the code gen)
3652  // (it's not quite good, because the checking block has been closed by
3653  // this 'pop',
3654  // but atomic operation has not been executed yet, will be executed
3655  // slightly later, literally on next instruction)
3656  if (__kmp_env_consistency_check)
3657  __kmp_pop_sync(global_tid, ct_reduce, loc);
3658 
3659  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660  tree_reduce_block)) {
3661 
3662 // AT: performance issue: a real barrier here
3663 // AT: (if primary thread is slow, other threads are blocked here waiting for
3664 // the primary thread to come and release them)
3665 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3666 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3667 // be confusing to a customer)
3668 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3669 // might go faster and be more in line with sense of NOWAIT
3670 // AT: TO DO: do epcc test and compare times
3671 
3672 // this barrier should be invisible to a customer and to the threading profile
3673 // tool (it's neither a terminating barrier nor customer's code, it's
3674 // used for an internal purpose)
3675 #if OMPT_SUPPORT
3676  // JP: can this barrier potentially leed to task scheduling?
3677  // JP: as long as there is a barrier in the implementation, OMPT should and
3678  // will provide the barrier events
3679  // so we set-up the necessary frame/return addresses.
3680  ompt_frame_t *ompt_frame;
3681  if (ompt_enabled.enabled) {
3682  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3683  if (ompt_frame->enter_frame.ptr == NULL)
3684  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3685  }
3686  OMPT_STORE_RETURN_ADDRESS(global_tid);
3687 #endif
3688 #if USE_ITT_NOTIFY
3689  __kmp_threads[global_tid]->th.th_ident = loc;
3690 #endif
3691  retval =
3692  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3693  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3694  retval = (retval != 0) ? (0) : (1);
3695 #if OMPT_SUPPORT && OMPT_OPTIONAL
3696  if (ompt_enabled.enabled) {
3697  ompt_frame->enter_frame = ompt_data_none;
3698  }
3699 #endif
3700 
3701  // all other workers except primary thread should do this pop here
3702  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3703  if (__kmp_env_consistency_check) {
3704  if (retval == 0) {
3705  __kmp_pop_sync(global_tid, ct_reduce, loc);
3706  }
3707  }
3708 
3709  } else {
3710 
3711  // should never reach this block
3712  KMP_ASSERT(0); // "unexpected method"
3713  }
3714  if (teams_swapped) {
3715  __kmp_restore_swapped_teams(th, team, task_state);
3716  }
3717  KA_TRACE(
3718  10,
3719  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3720  global_tid, packed_reduction_method, retval));
3721 
3722  return retval;
3723 }
3724 
3733 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3734  kmp_critical_name *lck) {
3735 
3736  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3737 
3738  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3739  __kmp_assert_valid_gtid(global_tid);
3740 
3741  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3742 
3743  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3744 
3745  if (packed_reduction_method == critical_reduce_block) {
3746 
3747  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3748  OMPT_REDUCTION_END;
3749 
3750  } else if (packed_reduction_method == empty_reduce_block) {
3751 
3752  // usage: if team size == 1, no synchronization is required ( on Intel
3753  // platforms only )
3754 
3755  OMPT_REDUCTION_END;
3756 
3757  } else if (packed_reduction_method == atomic_reduce_block) {
3758 
3759  // neither primary thread nor other workers should get here
3760  // (code gen does not generate this call in case 2: atomic reduce block)
3761  // actually it's better to remove this elseif at all;
3762  // after removal this value will checked by the 'else' and will assert
3763 
3764  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765  tree_reduce_block)) {
3766 
3767  // only primary thread gets here
3768  // OMPT: tree reduction is annotated in the barrier code
3769 
3770  } else {
3771 
3772  // should never reach this block
3773  KMP_ASSERT(0); // "unexpected method"
3774  }
3775 
3776  if (__kmp_env_consistency_check)
3777  __kmp_pop_sync(global_tid, ct_reduce, loc);
3778 
3779  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3780  global_tid, packed_reduction_method));
3781 
3782  return;
3783 }
3784 
3785 /* 2.a.ii. Reduce Block with a terminating barrier */
3786 
3802 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3803  size_t reduce_size, void *reduce_data,
3804  void (*reduce_func)(void *lhs_data, void *rhs_data),
3805  kmp_critical_name *lck) {
3806  KMP_COUNT_BLOCK(REDUCE_wait);
3807  int retval = 0;
3808  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3809  kmp_info_t *th;
3810  kmp_team_t *team;
3811  int teams_swapped = 0, task_state;
3812 
3813  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3814  __kmp_assert_valid_gtid(global_tid);
3815 
3816  // why do we need this initialization here at all?
3817  // Reduction clause can not be a stand-alone directive.
3818 
3819  // do not call __kmp_serial_initialize(), it will be called by
3820  // __kmp_parallel_initialize() if needed
3821  // possible detection of false-positive race by the threadchecker ???
3822  if (!TCR_4(__kmp_init_parallel))
3823  __kmp_parallel_initialize();
3824 
3825  __kmp_resume_if_soft_paused();
3826 
3827 // check correctness of reduce block nesting
3828 #if KMP_USE_DYNAMIC_LOCK
3829  if (__kmp_env_consistency_check)
3830  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3831 #else
3832  if (__kmp_env_consistency_check)
3833  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3834 #endif
3835 
3836  th = __kmp_thread_from_gtid(global_tid);
3837  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3838 
3839  packed_reduction_method = __kmp_determine_reduction_method(
3840  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3841  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3842 
3843  OMPT_REDUCTION_DECL(th, global_tid);
3844 
3845  if (packed_reduction_method == critical_reduce_block) {
3846 
3847  OMPT_REDUCTION_BEGIN;
3848  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3849  retval = 1;
3850 
3851  } else if (packed_reduction_method == empty_reduce_block) {
3852 
3853  OMPT_REDUCTION_BEGIN;
3854  // usage: if team size == 1, no synchronization is required ( Intel
3855  // platforms only )
3856  retval = 1;
3857 
3858  } else if (packed_reduction_method == atomic_reduce_block) {
3859 
3860  retval = 2;
3861 
3862  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3863  tree_reduce_block)) {
3864 
3865 // case tree_reduce_block:
3866 // this barrier should be visible to a customer and to the threading profile
3867 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3868 #if OMPT_SUPPORT
3869  ompt_frame_t *ompt_frame;
3870  if (ompt_enabled.enabled) {
3871  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3872  if (ompt_frame->enter_frame.ptr == NULL)
3873  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3874  }
3875  OMPT_STORE_RETURN_ADDRESS(global_tid);
3876 #endif
3877 #if USE_ITT_NOTIFY
3878  __kmp_threads[global_tid]->th.th_ident =
3879  loc; // needed for correct notification of frames
3880 #endif
3881  retval =
3882  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3883  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3884  retval = (retval != 0) ? (0) : (1);
3885 #if OMPT_SUPPORT && OMPT_OPTIONAL
3886  if (ompt_enabled.enabled) {
3887  ompt_frame->enter_frame = ompt_data_none;
3888  }
3889 #endif
3890 
3891  // all other workers except primary thread should do this pop here
3892  // (none of other workers except primary will enter __kmpc_end_reduce())
3893  if (__kmp_env_consistency_check) {
3894  if (retval == 0) { // 0: all other workers; 1: primary thread
3895  __kmp_pop_sync(global_tid, ct_reduce, loc);
3896  }
3897  }
3898 
3899  } else {
3900 
3901  // should never reach this block
3902  KMP_ASSERT(0); // "unexpected method"
3903  }
3904  if (teams_swapped) {
3905  __kmp_restore_swapped_teams(th, team, task_state);
3906  }
3907 
3908  KA_TRACE(10,
3909  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3910  global_tid, packed_reduction_method, retval));
3911  return retval;
3912 }
3913 
3924 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3925  kmp_critical_name *lck) {
3926 
3927  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3928  kmp_info_t *th;
3929  kmp_team_t *team;
3930  int teams_swapped = 0, task_state;
3931 
3932  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3933  __kmp_assert_valid_gtid(global_tid);
3934 
3935  th = __kmp_thread_from_gtid(global_tid);
3936  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3937 
3938  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3939 
3940  // this barrier should be visible to a customer and to the threading profile
3941  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3942  OMPT_REDUCTION_DECL(th, global_tid);
3943 
3944  if (packed_reduction_method == critical_reduce_block) {
3945  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3946 
3947  OMPT_REDUCTION_END;
3948 
3949 // TODO: implicit barrier: should be exposed
3950 #if OMPT_SUPPORT
3951  ompt_frame_t *ompt_frame;
3952  if (ompt_enabled.enabled) {
3953  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3954  if (ompt_frame->enter_frame.ptr == NULL)
3955  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3956  }
3957  OMPT_STORE_RETURN_ADDRESS(global_tid);
3958 #endif
3959 #if USE_ITT_NOTIFY
3960  __kmp_threads[global_tid]->th.th_ident = loc;
3961 #endif
3962  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3963 #if OMPT_SUPPORT && OMPT_OPTIONAL
3964  if (ompt_enabled.enabled) {
3965  ompt_frame->enter_frame = ompt_data_none;
3966  }
3967 #endif
3968 
3969  } else if (packed_reduction_method == empty_reduce_block) {
3970 
3971  OMPT_REDUCTION_END;
3972 
3973 // usage: if team size==1, no synchronization is required (Intel platforms only)
3974 
3975 // TODO: implicit barrier: should be exposed
3976 #if OMPT_SUPPORT
3977  ompt_frame_t *ompt_frame;
3978  if (ompt_enabled.enabled) {
3979  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3980  if (ompt_frame->enter_frame.ptr == NULL)
3981  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3982  }
3983  OMPT_STORE_RETURN_ADDRESS(global_tid);
3984 #endif
3985 #if USE_ITT_NOTIFY
3986  __kmp_threads[global_tid]->th.th_ident = loc;
3987 #endif
3988  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3989 #if OMPT_SUPPORT && OMPT_OPTIONAL
3990  if (ompt_enabled.enabled) {
3991  ompt_frame->enter_frame = ompt_data_none;
3992  }
3993 #endif
3994 
3995  } else if (packed_reduction_method == atomic_reduce_block) {
3996 
3997 #if OMPT_SUPPORT
3998  ompt_frame_t *ompt_frame;
3999  if (ompt_enabled.enabled) {
4000  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4001  if (ompt_frame->enter_frame.ptr == NULL)
4002  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4003  }
4004  OMPT_STORE_RETURN_ADDRESS(global_tid);
4005 #endif
4006 // TODO: implicit barrier: should be exposed
4007 #if USE_ITT_NOTIFY
4008  __kmp_threads[global_tid]->th.th_ident = loc;
4009 #endif
4010  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4011 #if OMPT_SUPPORT && OMPT_OPTIONAL
4012  if (ompt_enabled.enabled) {
4013  ompt_frame->enter_frame = ompt_data_none;
4014  }
4015 #endif
4016 
4017  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4018  tree_reduce_block)) {
4019 
4020  // only primary thread executes here (primary releases all other workers)
4021  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4022  global_tid);
4023 
4024  } else {
4025 
4026  // should never reach this block
4027  KMP_ASSERT(0); // "unexpected method"
4028  }
4029  if (teams_swapped) {
4030  __kmp_restore_swapped_teams(th, team, task_state);
4031  }
4032 
4033  if (__kmp_env_consistency_check)
4034  __kmp_pop_sync(global_tid, ct_reduce, loc);
4035 
4036  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4037  global_tid, packed_reduction_method));
4038 
4039  return;
4040 }
4041 
4042 #undef __KMP_GET_REDUCTION_METHOD
4043 #undef __KMP_SET_REDUCTION_METHOD
4044 
4045 /* end of interface to fast scalable reduce routines */
4046 
4047 kmp_uint64 __kmpc_get_taskid() {
4048 
4049  kmp_int32 gtid;
4050  kmp_info_t *thread;
4051 
4052  gtid = __kmp_get_gtid();
4053  if (gtid < 0) {
4054  return 0;
4055  }
4056  thread = __kmp_thread_from_gtid(gtid);
4057  return thread->th.th_current_task->td_task_id;
4058 
4059 } // __kmpc_get_taskid
4060 
4061 kmp_uint64 __kmpc_get_parent_taskid() {
4062 
4063  kmp_int32 gtid;
4064  kmp_info_t *thread;
4065  kmp_taskdata_t *parent_task;
4066 
4067  gtid = __kmp_get_gtid();
4068  if (gtid < 0) {
4069  return 0;
4070  }
4071  thread = __kmp_thread_from_gtid(gtid);
4072  parent_task = thread->th.th_current_task->td_parent;
4073  return (parent_task == NULL ? 0 : parent_task->td_task_id);
4074 
4075 } // __kmpc_get_parent_taskid
4076 
4088 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4089  const struct kmp_dim *dims) {
4090  __kmp_assert_valid_gtid(gtid);
4091  int j, idx;
4092  kmp_int64 last, trace_count;
4093  kmp_info_t *th = __kmp_threads[gtid];
4094  kmp_team_t *team = th->th.th_team;
4095  kmp_uint32 *flags;
4096  kmp_disp_t *pr_buf = th->th.th_dispatch;
4097  dispatch_shared_info_t *sh_buf;
4098 
4099  KA_TRACE(
4100  20,
4101  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4102  gtid, num_dims, !team->t.t_serialized));
4103  KMP_DEBUG_ASSERT(dims != NULL);
4104  KMP_DEBUG_ASSERT(num_dims > 0);
4105 
4106  if (team->t.t_serialized) {
4107  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4108  return; // no dependencies if team is serialized
4109  }
4110  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4111  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4112  // the next loop
4113  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4114 
4115  // Save bounds info into allocated private buffer
4116  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4117  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4118  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4119  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4120  pr_buf->th_doacross_info[0] =
4121  (kmp_int64)num_dims; // first element is number of dimensions
4122  // Save also address of num_done in order to access it later without knowing
4123  // the buffer index
4124  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4125  pr_buf->th_doacross_info[2] = dims[0].lo;
4126  pr_buf->th_doacross_info[3] = dims[0].up;
4127  pr_buf->th_doacross_info[4] = dims[0].st;
4128  last = 5;
4129  for (j = 1; j < num_dims; ++j) {
4130  kmp_int64
4131  range_length; // To keep ranges of all dimensions but the first dims[0]
4132  if (dims[j].st == 1) { // most common case
4133  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4134  range_length = dims[j].up - dims[j].lo + 1;
4135  } else {
4136  if (dims[j].st > 0) {
4137  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4138  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4139  } else { // negative increment
4140  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4141  range_length =
4142  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4143  }
4144  }
4145  pr_buf->th_doacross_info[last++] = range_length;
4146  pr_buf->th_doacross_info[last++] = dims[j].lo;
4147  pr_buf->th_doacross_info[last++] = dims[j].up;
4148  pr_buf->th_doacross_info[last++] = dims[j].st;
4149  }
4150 
4151  // Compute total trip count.
4152  // Start with range of dims[0] which we don't need to keep in the buffer.
4153  if (dims[0].st == 1) { // most common case
4154  trace_count = dims[0].up - dims[0].lo + 1;
4155  } else if (dims[0].st > 0) {
4156  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4157  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4158  } else { // negative increment
4159  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4160  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4161  }
4162  for (j = 1; j < num_dims; ++j) {
4163  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4164  }
4165  KMP_DEBUG_ASSERT(trace_count > 0);
4166 
4167  // Check if shared buffer is not occupied by other loop (idx -
4168  // __kmp_dispatch_num_buffers)
4169  if (idx != sh_buf->doacross_buf_idx) {
4170  // Shared buffer is occupied, wait for it to be free
4171  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4172  __kmp_eq_4, NULL);
4173  }
4174 #if KMP_32_BIT_ARCH
4175  // Check if we are the first thread. After the CAS the first thread gets 0,
4176  // others get 1 if initialization is in progress, allocated pointer otherwise.
4177  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4178  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4179  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4180 #else
4181  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4182  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4183 #endif
4184  if (flags == NULL) {
4185  // we are the first thread, allocate the array of flags
4186  size_t size =
4187  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4188  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4189  KMP_MB();
4190  sh_buf->doacross_flags = flags;
4191  } else if (flags == (kmp_uint32 *)1) {
4192 #if KMP_32_BIT_ARCH
4193  // initialization is still in progress, need to wait
4194  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4195 #else
4196  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4197 #endif
4198  KMP_YIELD(TRUE);
4199  KMP_MB();
4200  } else {
4201  KMP_MB();
4202  }
4203  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4204  pr_buf->th_doacross_flags =
4205  sh_buf->doacross_flags; // save private copy in order to not
4206  // touch shared buffer on each iteration
4207  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4208 }
4209 
4210 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4211  __kmp_assert_valid_gtid(gtid);
4212  kmp_int64 shft;
4213  size_t num_dims, i;
4214  kmp_uint32 flag;
4215  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4216  kmp_info_t *th = __kmp_threads[gtid];
4217  kmp_team_t *team = th->th.th_team;
4218  kmp_disp_t *pr_buf;
4219  kmp_int64 lo, up, st;
4220 
4221  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4222  if (team->t.t_serialized) {
4223  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4224  return; // no dependencies if team is serialized
4225  }
4226 
4227  // calculate sequential iteration number and check out-of-bounds condition
4228  pr_buf = th->th.th_dispatch;
4229  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4230  num_dims = (size_t)pr_buf->th_doacross_info[0];
4231  lo = pr_buf->th_doacross_info[2];
4232  up = pr_buf->th_doacross_info[3];
4233  st = pr_buf->th_doacross_info[4];
4234 #if OMPT_SUPPORT && OMPT_OPTIONAL
4235  ompt_dependence_t deps[num_dims];
4236 #endif
4237  if (st == 1) { // most common case
4238  if (vec[0] < lo || vec[0] > up) {
4239  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4240  "bounds [%lld,%lld]\n",
4241  gtid, vec[0], lo, up));
4242  return;
4243  }
4244  iter_number = vec[0] - lo;
4245  } else if (st > 0) {
4246  if (vec[0] < lo || vec[0] > up) {
4247  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4248  "bounds [%lld,%lld]\n",
4249  gtid, vec[0], lo, up));
4250  return;
4251  }
4252  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4253  } else { // negative increment
4254  if (vec[0] > lo || vec[0] < up) {
4255  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4256  "bounds [%lld,%lld]\n",
4257  gtid, vec[0], lo, up));
4258  return;
4259  }
4260  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4261  }
4262 #if OMPT_SUPPORT && OMPT_OPTIONAL
4263  deps[0].variable.value = iter_number;
4264  deps[0].dependence_type = ompt_dependence_type_sink;
4265 #endif
4266  for (i = 1; i < num_dims; ++i) {
4267  kmp_int64 iter, ln;
4268  size_t j = i * 4;
4269  ln = pr_buf->th_doacross_info[j + 1];
4270  lo = pr_buf->th_doacross_info[j + 2];
4271  up = pr_buf->th_doacross_info[j + 3];
4272  st = pr_buf->th_doacross_info[j + 4];
4273  if (st == 1) {
4274  if (vec[i] < lo || vec[i] > up) {
4275  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4276  "bounds [%lld,%lld]\n",
4277  gtid, vec[i], lo, up));
4278  return;
4279  }
4280  iter = vec[i] - lo;
4281  } else if (st > 0) {
4282  if (vec[i] < lo || vec[i] > up) {
4283  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4284  "bounds [%lld,%lld]\n",
4285  gtid, vec[i], lo, up));
4286  return;
4287  }
4288  iter = (kmp_uint64)(vec[i] - lo) / st;
4289  } else { // st < 0
4290  if (vec[i] > lo || vec[i] < up) {
4291  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4292  "bounds [%lld,%lld]\n",
4293  gtid, vec[i], lo, up));
4294  return;
4295  }
4296  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4297  }
4298  iter_number = iter + ln * iter_number;
4299 #if OMPT_SUPPORT && OMPT_OPTIONAL
4300  deps[i].variable.value = iter;
4301  deps[i].dependence_type = ompt_dependence_type_sink;
4302 #endif
4303  }
4304  shft = iter_number % 32; // use 32-bit granularity
4305  iter_number >>= 5; // divided by 32
4306  flag = 1 << shft;
4307  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4308  KMP_YIELD(TRUE);
4309  }
4310  KMP_MB();
4311 #if OMPT_SUPPORT && OMPT_OPTIONAL
4312  if (ompt_enabled.ompt_callback_dependences) {
4313  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4314  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4315  }
4316 #endif
4317  KA_TRACE(20,
4318  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4319  gtid, (iter_number << 5) + shft));
4320 }
4321 
4322 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4323  __kmp_assert_valid_gtid(gtid);
4324  kmp_int64 shft;
4325  size_t num_dims, i;
4326  kmp_uint32 flag;
4327  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4328  kmp_info_t *th = __kmp_threads[gtid];
4329  kmp_team_t *team = th->th.th_team;
4330  kmp_disp_t *pr_buf;
4331  kmp_int64 lo, st;
4332 
4333  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4334  if (team->t.t_serialized) {
4335  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4336  return; // no dependencies if team is serialized
4337  }
4338 
4339  // calculate sequential iteration number (same as in "wait" but no
4340  // out-of-bounds checks)
4341  pr_buf = th->th.th_dispatch;
4342  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4343  num_dims = (size_t)pr_buf->th_doacross_info[0];
4344  lo = pr_buf->th_doacross_info[2];
4345  st = pr_buf->th_doacross_info[4];
4346 #if OMPT_SUPPORT && OMPT_OPTIONAL
4347  ompt_dependence_t deps[num_dims];
4348 #endif
4349  if (st == 1) { // most common case
4350  iter_number = vec[0] - lo;
4351  } else if (st > 0) {
4352  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4353  } else { // negative increment
4354  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4355  }
4356 #if OMPT_SUPPORT && OMPT_OPTIONAL
4357  deps[0].variable.value = iter_number;
4358  deps[0].dependence_type = ompt_dependence_type_source;
4359 #endif
4360  for (i = 1; i < num_dims; ++i) {
4361  kmp_int64 iter, ln;
4362  size_t j = i * 4;
4363  ln = pr_buf->th_doacross_info[j + 1];
4364  lo = pr_buf->th_doacross_info[j + 2];
4365  st = pr_buf->th_doacross_info[j + 4];
4366  if (st == 1) {
4367  iter = vec[i] - lo;
4368  } else if (st > 0) {
4369  iter = (kmp_uint64)(vec[i] - lo) / st;
4370  } else { // st < 0
4371  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4372  }
4373  iter_number = iter + ln * iter_number;
4374 #if OMPT_SUPPORT && OMPT_OPTIONAL
4375  deps[i].variable.value = iter;
4376  deps[i].dependence_type = ompt_dependence_type_source;
4377 #endif
4378  }
4379 #if OMPT_SUPPORT && OMPT_OPTIONAL
4380  if (ompt_enabled.ompt_callback_dependences) {
4381  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4382  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4383  }
4384 #endif
4385  shft = iter_number % 32; // use 32-bit granularity
4386  iter_number >>= 5; // divided by 32
4387  flag = 1 << shft;
4388  KMP_MB();
4389  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4390  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4391  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4392  (iter_number << 5) + shft));
4393 }
4394 
4395 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4396  __kmp_assert_valid_gtid(gtid);
4397  kmp_int32 num_done;
4398  kmp_info_t *th = __kmp_threads[gtid];
4399  kmp_team_t *team = th->th.th_team;
4400  kmp_disp_t *pr_buf = th->th.th_dispatch;
4401 
4402  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4403  if (team->t.t_serialized) {
4404  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4405  return; // nothing to do
4406  }
4407  num_done =
4408  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4409  if (num_done == th->th.th_team_nproc) {
4410  // we are the last thread, need to free shared resources
4411  int idx = pr_buf->th_doacross_buf_idx - 1;
4412  dispatch_shared_info_t *sh_buf =
4413  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4414  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4415  (kmp_int64)&sh_buf->doacross_num_done);
4416  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4417  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4418  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4419  sh_buf->doacross_flags = NULL;
4420  sh_buf->doacross_num_done = 0;
4421  sh_buf->doacross_buf_idx +=
4422  __kmp_dispatch_num_buffers; // free buffer for future re-use
4423  }
4424  // free private resources (need to keep buffer index forever)
4425  pr_buf->th_doacross_flags = NULL;
4426  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4427  pr_buf->th_doacross_info = NULL;
4428  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4429 }
4430 
4431 /* OpenMP 5.1 Memory Management routines */
4432 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4433  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4434 }
4435 
4436 void *omp_aligned_alloc(size_t align, size_t size,
4437  omp_allocator_handle_t allocator) {
4438  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4439 }
4440 
4441 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4442  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4443 }
4444 
4445 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4446  omp_allocator_handle_t allocator) {
4447  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4448 }
4449 
4450 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4451  omp_allocator_handle_t free_allocator) {
4452  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4453  free_allocator);
4454 }
4455 
4456 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4457  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4458 }
4459 /* end of OpenMP 5.1 Memory Management routines */
4460 
4461 int __kmpc_get_target_offload(void) {
4462  if (!__kmp_init_serial) {
4463  __kmp_serial_initialize();
4464  }
4465  return __kmp_target_offload;
4466 }
4467 
4468 int __kmpc_pause_resource(kmp_pause_status_t level) {
4469  if (!__kmp_init_serial) {
4470  return 1; // Can't pause if runtime is not initialized
4471  }
4472  return __kmp_pause_resource(level);
4473 }
4474 
4475 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4476  if (!__kmp_init_serial)
4477  __kmp_serial_initialize();
4478 
4479  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4480 
4481 #if OMPT_SUPPORT
4482  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4483  ompt_callbacks.ompt_callback(ompt_callback_error)(
4484  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4485  OMPT_GET_RETURN_ADDRESS(0));
4486  }
4487 #endif // OMPT_SUPPORT
4488 
4489  char *src_loc;
4490  if (loc && loc->psource) {
4491  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4492  src_loc =
4493  __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4494  __kmp_str_loc_free(&str_loc);
4495  } else {
4496  src_loc = __kmp_str_format("unknown");
4497  }
4498 
4499  if (severity == severity_warning)
4500  KMP_WARNING(UserDirectedWarning, src_loc, message);
4501  else
4502  KMP_FATAL(UserDirectedError, src_loc, message);
4503 
4504  __kmp_str_free(&src_loc);
4505 }
4506 
4507 // Mark begin of scope directive.
4508 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4509 // reserved is for extension of scope directive and not used.
4510 #if OMPT_SUPPORT && OMPT_OPTIONAL
4511  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4512  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4513  int tid = __kmp_tid_from_gtid(gtid);
4514  ompt_callbacks.ompt_callback(ompt_callback_work)(
4515  ompt_work_scope, ompt_scope_begin,
4516  &(team->t.ompt_team_info.parallel_data),
4517  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4518  OMPT_GET_RETURN_ADDRESS(0));
4519  }
4520 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4521 }
4522 
4523 // Mark end of scope directive
4524 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4525 // reserved is for extension of scope directive and not used.
4526 #if OMPT_SUPPORT && OMPT_OPTIONAL
4527  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4528  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4529  int tid = __kmp_tid_from_gtid(gtid);
4530  ompt_callbacks.ompt_callback(ompt_callback_work)(
4531  ompt_work_scope, ompt_scope_end,
4532  &(team->t.ompt_team_info.parallel_data),
4533  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4534  OMPT_GET_RETURN_ADDRESS(0));
4535  }
4536 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4537 }
4538 
4539 #ifdef KMP_USE_VERSION_SYMBOLS
4540 // For GOMP compatibility there are two versions of each omp_* API.
4541 // One is the plain C symbol and one is the Fortran symbol with an appended
4542 // underscore. When we implement a specific ompc_* version of an omp_*
4543 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4544 // instead of the Fortran versions in kmp_ftn_entry.h
4545 extern "C" {
4546 // Have to undef these from omp.h so they aren't translated into
4547 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4548 #ifdef omp_set_affinity_format
4549 #undef omp_set_affinity_format
4550 #endif
4551 #ifdef omp_get_affinity_format
4552 #undef omp_get_affinity_format
4553 #endif
4554 #ifdef omp_display_affinity
4555 #undef omp_display_affinity
4556 #endif
4557 #ifdef omp_capture_affinity
4558 #undef omp_capture_affinity
4559 #endif
4560 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4561  "OMP_5.0");
4562 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4563  "OMP_5.0");
4564 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4565  "OMP_5.0");
4566 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4567  "OMP_5.0");
4568 } // extern "C"
4569 #endif
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:235
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1712
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:245
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
kmp_int32 flags
Definition: kmp.h:237
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)