LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344  kmp_int32 cond, void *args) {
345  int gtid = __kmp_entry_gtid();
346  int zero = 0;
347  if (cond) {
348  if (args)
349  __kmpc_fork_call(loc, argc, microtask, args);
350  else
351  __kmpc_fork_call(loc, argc, microtask);
352  } else {
353  __kmpc_serialized_parallel(loc, gtid);
354 
355  if (args)
356  microtask(&gtid, &zero, args);
357  else
358  microtask(&gtid, &zero);
359 
361  }
362 }
363 
375 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
376  kmp_int32 num_teams, kmp_int32 num_threads) {
377  KA_TRACE(20,
378  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
379  global_tid, num_teams, num_threads));
380  __kmp_assert_valid_gtid(global_tid);
381  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
382 }
383 
400 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
401  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
402  kmp_int32 num_threads) {
403  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
404  " num_teams_ub=%d num_threads=%d\n",
405  global_tid, num_teams_lb, num_teams_ub, num_threads));
406  __kmp_assert_valid_gtid(global_tid);
407  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
408  num_threads);
409 }
410 
421 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
422  ...) {
423  int gtid = __kmp_entry_gtid();
424  kmp_info_t *this_thr = __kmp_threads[gtid];
425  va_list ap;
426  va_start(ap, microtask);
427 
428 #if KMP_STATS_ENABLED
429  KMP_COUNT_BLOCK(OMP_TEAMS);
430  stats_state_e previous_state = KMP_GET_THREAD_STATE();
431  if (previous_state == stats_state_e::SERIAL_REGION) {
432  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
433  } else {
434  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
435  }
436 #endif
437 
438  // remember teams entry point and nesting level
439  this_thr->th.th_teams_microtask = microtask;
440  this_thr->th.th_teams_level =
441  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
442 
443 #if OMPT_SUPPORT
444  kmp_team_t *parent_team = this_thr->th.th_team;
445  int tid = __kmp_tid_from_gtid(gtid);
446  if (ompt_enabled.enabled) {
447  parent_team->t.t_implicit_task_taskdata[tid]
448  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
449  }
450  OMPT_STORE_RETURN_ADDRESS(gtid);
451 #endif
452 
453  // check if __kmpc_push_num_teams called, set default number of teams
454  // otherwise
455  if (this_thr->th.th_teams_size.nteams == 0) {
456  __kmp_push_num_teams(loc, gtid, 0, 0);
457  }
458  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
459  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
460  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
461 
462  __kmp_fork_call(
463  loc, gtid, fork_context_intel, argc,
464  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
465  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
466  __kmp_join_call(loc, gtid
467 #if OMPT_SUPPORT
468  ,
469  fork_context_intel
470 #endif
471  );
472 
473  // Pop current CG root off list
474  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
475  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
476  this_thr->th.th_cg_roots = tmp->up;
477  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
478  " to node %p. cg_nthreads was %d\n",
479  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
480  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
481  int i = tmp->cg_nthreads--;
482  if (i == 1) { // check is we are the last thread in CG (not always the case)
483  __kmp_free(tmp);
484  }
485  // Restore current task's thread_limit from CG root
486  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
487  this_thr->th.th_current_task->td_icvs.thread_limit =
488  this_thr->th.th_cg_roots->cg_thread_limit;
489 
490  this_thr->th.th_teams_microtask = NULL;
491  this_thr->th.th_teams_level = 0;
492  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
493  va_end(ap);
494 #if KMP_STATS_ENABLED
495  if (previous_state == stats_state_e::SERIAL_REGION) {
496  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
497  KMP_SET_THREAD_STATE(previous_state);
498  } else {
499  KMP_POP_PARTITIONED_TIMER();
500  }
501 #endif // KMP_STATS_ENABLED
502 }
503 
504 // I don't think this function should ever have been exported.
505 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
506 // openmp code ever called it, but it's been exported from the RTL for so
507 // long that I'm afraid to remove the definition.
508 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
509 
522 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
523  // The implementation is now in kmp_runtime.cpp so that it can share static
524  // functions with kmp_fork_call since the tasks to be done are similar in
525  // each case.
526  __kmp_assert_valid_gtid(global_tid);
527 #if OMPT_SUPPORT
528  OMPT_STORE_RETURN_ADDRESS(global_tid);
529 #endif
530  __kmp_serialized_parallel(loc, global_tid);
531 }
532 
540 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
541  kmp_internal_control_t *top;
542  kmp_info_t *this_thr;
543  kmp_team_t *serial_team;
544 
545  KC_TRACE(10,
546  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
547 
548  /* skip all this code for autopar serialized loops since it results in
549  unacceptable overhead */
550  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
551  return;
552 
553  // Not autopar code
554  __kmp_assert_valid_gtid(global_tid);
555  if (!TCR_4(__kmp_init_parallel))
556  __kmp_parallel_initialize();
557 
558  __kmp_resume_if_soft_paused();
559 
560  this_thr = __kmp_threads[global_tid];
561  serial_team = this_thr->th.th_serial_team;
562 
563  kmp_task_team_t *task_team = this_thr->th.th_task_team;
564  // we need to wait for the proxy tasks before finishing the thread
565  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
566  task_team->tt.tt_hidden_helper_task_encountered))
567  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
568 
569  KMP_MB();
570  KMP_DEBUG_ASSERT(serial_team);
571  KMP_ASSERT(serial_team->t.t_serialized);
572  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
573  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
574  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
575  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
576 
577 #if OMPT_SUPPORT
578  if (ompt_enabled.enabled &&
579  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
580  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
581  if (ompt_enabled.ompt_callback_implicit_task) {
582  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
583  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
584  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
585  }
586 
587  // reset clear the task id only after unlinking the task
588  ompt_data_t *parent_task_data;
589  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
590 
591  if (ompt_enabled.ompt_callback_parallel_end) {
592  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
593  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
594  ompt_parallel_invoker_program | ompt_parallel_team,
595  OMPT_LOAD_RETURN_ADDRESS(global_tid));
596  }
597  __ompt_lw_taskteam_unlink(this_thr);
598  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
599  }
600 #endif
601 
602  /* If necessary, pop the internal control stack values and replace the team
603  * values */
604  top = serial_team->t.t_control_stack_top;
605  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
606  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
607  serial_team->t.t_control_stack_top = top->next;
608  __kmp_free(top);
609  }
610 
611  /* pop dispatch buffers stack */
612  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
613  {
614  dispatch_private_info_t *disp_buffer =
615  serial_team->t.t_dispatch->th_disp_buffer;
616  serial_team->t.t_dispatch->th_disp_buffer =
617  serial_team->t.t_dispatch->th_disp_buffer->next;
618  __kmp_free(disp_buffer);
619  }
620  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
621 
622  --serial_team->t.t_serialized;
623  if (serial_team->t.t_serialized == 0) {
624 
625  /* return to the parallel section */
626 
627 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
628  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
629  __kmp_clear_x87_fpu_status_word();
630  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
631  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
632  }
633 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
634 
635  __kmp_pop_current_task_from_thread(this_thr);
636 #if OMPD_SUPPORT
637  if (ompd_state & OMPD_ENABLE_BP)
638  ompd_bp_parallel_end();
639 #endif
640 
641  this_thr->th.th_team = serial_team->t.t_parent;
642  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
643 
644  /* restore values cached in the thread */
645  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
646  this_thr->th.th_team_master =
647  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
648  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
649 
650  /* TODO the below shouldn't need to be adjusted for serialized teams */
651  this_thr->th.th_dispatch =
652  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
653 
654  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
655  this_thr->th.th_current_task->td_flags.executing = 1;
656 
657  if (__kmp_tasking_mode != tskm_immediate_exec) {
658  // Copy the task team from the new child / old parent team to the thread.
659  this_thr->th.th_task_team =
660  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
661  KA_TRACE(20,
662  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
663  "team %p\n",
664  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
665  }
666 #if KMP_AFFINITY_SUPPORTED
667  if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
668  __kmp_reset_root_init_mask(global_tid);
669  }
670 #endif
671  } else {
672  if (__kmp_tasking_mode != tskm_immediate_exec) {
673  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
674  "depth of serial team %p to %d\n",
675  global_tid, serial_team, serial_team->t.t_serialized));
676  }
677  }
678 
679  serial_team->t.t_level--;
680  if (__kmp_env_consistency_check)
681  __kmp_pop_parallel(global_tid, NULL);
682 #if OMPT_SUPPORT
683  if (ompt_enabled.enabled)
684  this_thr->th.ompt_thread_info.state =
685  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
686  : ompt_state_work_parallel);
687 #endif
688 }
689 
698 void __kmpc_flush(ident_t *loc) {
699  KC_TRACE(10, ("__kmpc_flush: called\n"));
700 
701  /* need explicit __mf() here since use volatile instead in library */
702  KMP_MFENCE(); /* Flush all pending memory write invalidates. */
703 
704 #if OMPT_SUPPORT && OMPT_OPTIONAL
705  if (ompt_enabled.ompt_callback_flush) {
706  ompt_callbacks.ompt_callback(ompt_callback_flush)(
707  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
708  }
709 #endif
710 }
711 
712 /* -------------------------------------------------------------------------- */
720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
721  KMP_COUNT_BLOCK(OMP_BARRIER);
722  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
723  __kmp_assert_valid_gtid(global_tid);
724 
725  if (!TCR_4(__kmp_init_parallel))
726  __kmp_parallel_initialize();
727 
728  __kmp_resume_if_soft_paused();
729 
730  if (__kmp_env_consistency_check) {
731  if (loc == 0) {
732  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
733  }
734  __kmp_check_barrier(global_tid, ct_barrier, loc);
735  }
736 
737 #if OMPT_SUPPORT
738  ompt_frame_t *ompt_frame;
739  if (ompt_enabled.enabled) {
740  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
741  if (ompt_frame->enter_frame.ptr == NULL)
742  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
743  }
744  OMPT_STORE_RETURN_ADDRESS(global_tid);
745 #endif
746  __kmp_threads[global_tid]->th.th_ident = loc;
747  // TODO: explicit barrier_wait_id:
748  // this function is called when 'barrier' directive is present or
749  // implicit barrier at the end of a worksharing construct.
750  // 1) better to add a per-thread barrier counter to a thread data structure
751  // 2) set to 0 when a new team is created
752  // 4) no sync is required
753 
754  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
755 #if OMPT_SUPPORT && OMPT_OPTIONAL
756  if (ompt_enabled.enabled) {
757  ompt_frame->enter_frame = ompt_data_none;
758  }
759 #endif
760 }
761 
762 /* The BARRIER for a MASTER section is always explicit */
769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
770  int status = 0;
771 
772  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
773  __kmp_assert_valid_gtid(global_tid);
774 
775  if (!TCR_4(__kmp_init_parallel))
776  __kmp_parallel_initialize();
777 
778  __kmp_resume_if_soft_paused();
779 
780  if (KMP_MASTER_GTID(global_tid)) {
781  KMP_COUNT_BLOCK(OMP_MASTER);
782  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
783  status = 1;
784  }
785 
786 #if OMPT_SUPPORT && OMPT_OPTIONAL
787  if (status) {
788  if (ompt_enabled.ompt_callback_masked) {
789  kmp_info_t *this_thr = __kmp_threads[global_tid];
790  kmp_team_t *team = this_thr->th.th_team;
791 
792  int tid = __kmp_tid_from_gtid(global_tid);
793  ompt_callbacks.ompt_callback(ompt_callback_masked)(
794  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
795  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
796  OMPT_GET_RETURN_ADDRESS(0));
797  }
798  }
799 #endif
800 
801  if (__kmp_env_consistency_check) {
802 #if KMP_USE_DYNAMIC_LOCK
803  if (status)
804  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
805  else
806  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
807 #else
808  if (status)
809  __kmp_push_sync(global_tid, ct_master, loc, NULL);
810  else
811  __kmp_check_sync(global_tid, ct_master, loc, NULL);
812 #endif
813  }
814 
815  return status;
816 }
817 
826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
827  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
828  __kmp_assert_valid_gtid(global_tid);
829  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
830  KMP_POP_PARTITIONED_TIMER();
831 
832 #if OMPT_SUPPORT && OMPT_OPTIONAL
833  kmp_info_t *this_thr = __kmp_threads[global_tid];
834  kmp_team_t *team = this_thr->th.th_team;
835  if (ompt_enabled.ompt_callback_masked) {
836  int tid = __kmp_tid_from_gtid(global_tid);
837  ompt_callbacks.ompt_callback(ompt_callback_masked)(
838  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
839  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
840  OMPT_GET_RETURN_ADDRESS(0));
841  }
842 #endif
843 
844  if (__kmp_env_consistency_check) {
845  if (KMP_MASTER_GTID(global_tid))
846  __kmp_pop_sync(global_tid, ct_master, loc);
847  }
848 }
849 
858 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
859  int status = 0;
860  int tid;
861  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
862  __kmp_assert_valid_gtid(global_tid);
863 
864  if (!TCR_4(__kmp_init_parallel))
865  __kmp_parallel_initialize();
866 
867  __kmp_resume_if_soft_paused();
868 
869  tid = __kmp_tid_from_gtid(global_tid);
870  if (tid == filter) {
871  KMP_COUNT_BLOCK(OMP_MASKED);
872  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
873  status = 1;
874  }
875 
876 #if OMPT_SUPPORT && OMPT_OPTIONAL
877  if (status) {
878  if (ompt_enabled.ompt_callback_masked) {
879  kmp_info_t *this_thr = __kmp_threads[global_tid];
880  kmp_team_t *team = this_thr->th.th_team;
881  ompt_callbacks.ompt_callback(ompt_callback_masked)(
882  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
883  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
884  OMPT_GET_RETURN_ADDRESS(0));
885  }
886  }
887 #endif
888 
889  if (__kmp_env_consistency_check) {
890 #if KMP_USE_DYNAMIC_LOCK
891  if (status)
892  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
893  else
894  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
895 #else
896  if (status)
897  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
898  else
899  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
900 #endif
901  }
902 
903  return status;
904 }
905 
914 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
915  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
916  __kmp_assert_valid_gtid(global_tid);
917  KMP_POP_PARTITIONED_TIMER();
918 
919 #if OMPT_SUPPORT && OMPT_OPTIONAL
920  kmp_info_t *this_thr = __kmp_threads[global_tid];
921  kmp_team_t *team = this_thr->th.th_team;
922  if (ompt_enabled.ompt_callback_masked) {
923  int tid = __kmp_tid_from_gtid(global_tid);
924  ompt_callbacks.ompt_callback(ompt_callback_masked)(
925  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
926  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
927  OMPT_GET_RETURN_ADDRESS(0));
928  }
929 #endif
930 
931  if (__kmp_env_consistency_check) {
932  __kmp_pop_sync(global_tid, ct_masked, loc);
933  }
934 }
935 
943 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
944  int cid = 0;
945  kmp_info_t *th;
946  KMP_DEBUG_ASSERT(__kmp_init_serial);
947 
948  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
949  __kmp_assert_valid_gtid(gtid);
950 
951  if (!TCR_4(__kmp_init_parallel))
952  __kmp_parallel_initialize();
953 
954  __kmp_resume_if_soft_paused();
955 
956 #if USE_ITT_BUILD
957  __kmp_itt_ordered_prep(gtid);
958 // TODO: ordered_wait_id
959 #endif /* USE_ITT_BUILD */
960 
961  th = __kmp_threads[gtid];
962 
963 #if OMPT_SUPPORT && OMPT_OPTIONAL
964  kmp_team_t *team;
965  ompt_wait_id_t lck;
966  void *codeptr_ra;
967  OMPT_STORE_RETURN_ADDRESS(gtid);
968  if (ompt_enabled.enabled) {
969  team = __kmp_team_from_gtid(gtid);
970  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
971  /* OMPT state update */
972  th->th.ompt_thread_info.wait_id = lck;
973  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
974 
975  /* OMPT event callback */
976  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
977  if (ompt_enabled.ompt_callback_mutex_acquire) {
978  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
979  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
980  codeptr_ra);
981  }
982  }
983 #endif
984 
985  if (th->th.th_dispatch->th_deo_fcn != 0)
986  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
987  else
988  __kmp_parallel_deo(&gtid, &cid, loc);
989 
990 #if OMPT_SUPPORT && OMPT_OPTIONAL
991  if (ompt_enabled.enabled) {
992  /* OMPT state update */
993  th->th.ompt_thread_info.state = ompt_state_work_parallel;
994  th->th.ompt_thread_info.wait_id = 0;
995 
996  /* OMPT event callback */
997  if (ompt_enabled.ompt_callback_mutex_acquired) {
998  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
999  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1000  }
1001  }
1002 #endif
1003 
1004 #if USE_ITT_BUILD
1005  __kmp_itt_ordered_start(gtid);
1006 #endif /* USE_ITT_BUILD */
1007 }
1008 
1016 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1017  int cid = 0;
1018  kmp_info_t *th;
1019 
1020  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1021  __kmp_assert_valid_gtid(gtid);
1022 
1023 #if USE_ITT_BUILD
1024  __kmp_itt_ordered_end(gtid);
1025 // TODO: ordered_wait_id
1026 #endif /* USE_ITT_BUILD */
1027 
1028  th = __kmp_threads[gtid];
1029 
1030  if (th->th.th_dispatch->th_dxo_fcn != 0)
1031  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1032  else
1033  __kmp_parallel_dxo(&gtid, &cid, loc);
1034 
1035 #if OMPT_SUPPORT && OMPT_OPTIONAL
1036  OMPT_STORE_RETURN_ADDRESS(gtid);
1037  if (ompt_enabled.ompt_callback_mutex_released) {
1038  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1039  ompt_mutex_ordered,
1040  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1041  ->t.t_ordered.dt.t_value,
1042  OMPT_LOAD_RETURN_ADDRESS(gtid));
1043  }
1044 #endif
1045 }
1046 
1047 #if KMP_USE_DYNAMIC_LOCK
1048 
1049 static __forceinline void
1050 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1051  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1052  // Pointer to the allocated indirect lock is written to crit, while indexing
1053  // is ignored.
1054  void *idx;
1055  kmp_indirect_lock_t **lck;
1056  lck = (kmp_indirect_lock_t **)crit;
1057  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1058  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1059  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1060  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1061  KA_TRACE(20,
1062  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1063 #if USE_ITT_BUILD
1064  __kmp_itt_critical_creating(ilk->lock, loc);
1065 #endif
1066  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1067  if (status == 0) {
1068 #if USE_ITT_BUILD
1069  __kmp_itt_critical_destroyed(ilk->lock);
1070 #endif
1071  // We don't really need to destroy the unclaimed lock here since it will be
1072  // cleaned up at program exit.
1073  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1074  }
1075  KMP_DEBUG_ASSERT(*lck != NULL);
1076 }
1077 
1078 // Fast-path acquire tas lock
1079 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1080  { \
1081  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1082  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1083  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1084  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1085  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1086  kmp_uint32 spins; \
1087  KMP_FSYNC_PREPARE(l); \
1088  KMP_INIT_YIELD(spins); \
1089  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1090  do { \
1091  if (TCR_4(__kmp_nth) > \
1092  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1093  KMP_YIELD(TRUE); \
1094  } else { \
1095  KMP_YIELD_SPIN(spins); \
1096  } \
1097  __kmp_spin_backoff(&backoff); \
1098  } while ( \
1099  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1100  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1101  } \
1102  KMP_FSYNC_ACQUIRED(l); \
1103  }
1104 
1105 // Fast-path test tas lock
1106 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1107  { \
1108  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1109  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1110  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1111  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1112  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1113  }
1114 
1115 // Fast-path release tas lock
1116 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1117  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1118 
1119 #if KMP_USE_FUTEX
1120 
1121 #include <sys/syscall.h>
1122 #include <unistd.h>
1123 #ifndef FUTEX_WAIT
1124 #define FUTEX_WAIT 0
1125 #endif
1126 #ifndef FUTEX_WAKE
1127 #define FUTEX_WAKE 1
1128 #endif
1129 
1130 // Fast-path acquire futex lock
1131 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1132  { \
1133  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1134  kmp_int32 gtid_code = (gtid + 1) << 1; \
1135  KMP_MB(); \
1136  KMP_FSYNC_PREPARE(ftx); \
1137  kmp_int32 poll_val; \
1138  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1139  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1140  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1141  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1142  if (!cond) { \
1143  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1144  poll_val | \
1145  KMP_LOCK_BUSY(1, futex))) { \
1146  continue; \
1147  } \
1148  poll_val |= KMP_LOCK_BUSY(1, futex); \
1149  } \
1150  kmp_int32 rc; \
1151  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1152  NULL, NULL, 0)) != 0) { \
1153  continue; \
1154  } \
1155  gtid_code |= 1; \
1156  } \
1157  KMP_FSYNC_ACQUIRED(ftx); \
1158  }
1159 
1160 // Fast-path test futex lock
1161 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1162  { \
1163  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1164  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1165  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1166  KMP_FSYNC_ACQUIRED(ftx); \
1167  rc = TRUE; \
1168  } else { \
1169  rc = FALSE; \
1170  } \
1171  }
1172 
1173 // Fast-path release futex lock
1174 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1175  { \
1176  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1177  KMP_MB(); \
1178  KMP_FSYNC_RELEASING(ftx); \
1179  kmp_int32 poll_val = \
1180  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1181  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1182  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1183  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1184  } \
1185  KMP_MB(); \
1186  KMP_YIELD_OVERSUB(); \
1187  }
1188 
1189 #endif // KMP_USE_FUTEX
1190 
1191 #else // KMP_USE_DYNAMIC_LOCK
1192 
1193 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1194  ident_t const *loc,
1195  kmp_int32 gtid) {
1196  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1197 
1198  // Because of the double-check, the following load doesn't need to be volatile
1199  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1200 
1201  if (lck == NULL) {
1202  void *idx;
1203 
1204  // Allocate & initialize the lock.
1205  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1206  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1207  __kmp_init_user_lock_with_checks(lck);
1208  __kmp_set_user_lock_location(lck, loc);
1209 #if USE_ITT_BUILD
1210  __kmp_itt_critical_creating(lck);
1211 // __kmp_itt_critical_creating() should be called *before* the first usage
1212 // of underlying lock. It is the only place where we can guarantee it. There
1213 // are chances the lock will destroyed with no usage, but it is not a
1214 // problem, because this is not real event seen by user but rather setting
1215 // name for object (lock). See more details in kmp_itt.h.
1216 #endif /* USE_ITT_BUILD */
1217 
1218  // Use a cmpxchg instruction to slam the start of the critical section with
1219  // the lock pointer. If another thread beat us to it, deallocate the lock,
1220  // and use the lock that the other thread allocated.
1221  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1222 
1223  if (status == 0) {
1224 // Deallocate the lock and reload the value.
1225 #if USE_ITT_BUILD
1226  __kmp_itt_critical_destroyed(lck);
1227 // Let ITT know the lock is destroyed and the same memory location may be reused
1228 // for another purpose.
1229 #endif /* USE_ITT_BUILD */
1230  __kmp_destroy_user_lock_with_checks(lck);
1231  __kmp_user_lock_free(&idx, gtid, lck);
1232  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1233  KMP_DEBUG_ASSERT(lck != NULL);
1234  }
1235  }
1236  return lck;
1237 }
1238 
1239 #endif // KMP_USE_DYNAMIC_LOCK
1240 
1251 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1252  kmp_critical_name *crit) {
1253 #if KMP_USE_DYNAMIC_LOCK
1254 #if OMPT_SUPPORT && OMPT_OPTIONAL
1255  OMPT_STORE_RETURN_ADDRESS(global_tid);
1256 #endif // OMPT_SUPPORT
1257  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1258 #else
1259  KMP_COUNT_BLOCK(OMP_CRITICAL);
1260 #if OMPT_SUPPORT && OMPT_OPTIONAL
1261  ompt_state_t prev_state = ompt_state_undefined;
1262  ompt_thread_info_t ti;
1263 #endif
1264  kmp_user_lock_p lck;
1265 
1266  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1267  __kmp_assert_valid_gtid(global_tid);
1268 
1269  // TODO: add THR_OVHD_STATE
1270 
1271  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1272  KMP_CHECK_USER_LOCK_INIT();
1273 
1274  if ((__kmp_user_lock_kind == lk_tas) &&
1275  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1276  lck = (kmp_user_lock_p)crit;
1277  }
1278 #if KMP_USE_FUTEX
1279  else if ((__kmp_user_lock_kind == lk_futex) &&
1280  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1281  lck = (kmp_user_lock_p)crit;
1282  }
1283 #endif
1284  else { // ticket, queuing or drdpa
1285  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1286  }
1287 
1288  if (__kmp_env_consistency_check)
1289  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1290 
1291  // since the critical directive binds to all threads, not just the current
1292  // team we have to check this even if we are in a serialized team.
1293  // also, even if we are the uber thread, we still have to conduct the lock,
1294  // as we have to contend with sibling threads.
1295 
1296 #if USE_ITT_BUILD
1297  __kmp_itt_critical_acquiring(lck);
1298 #endif /* USE_ITT_BUILD */
1299 #if OMPT_SUPPORT && OMPT_OPTIONAL
1300  OMPT_STORE_RETURN_ADDRESS(gtid);
1301  void *codeptr_ra = NULL;
1302  if (ompt_enabled.enabled) {
1303  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1304  /* OMPT state update */
1305  prev_state = ti.state;
1306  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1307  ti.state = ompt_state_wait_critical;
1308 
1309  /* OMPT event callback */
1310  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1311  if (ompt_enabled.ompt_callback_mutex_acquire) {
1312  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1313  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1314  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1315  }
1316  }
1317 #endif
1318  // Value of 'crit' should be good for using as a critical_id of the critical
1319  // section directive.
1320  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1321 
1322 #if USE_ITT_BUILD
1323  __kmp_itt_critical_acquired(lck);
1324 #endif /* USE_ITT_BUILD */
1325 #if OMPT_SUPPORT && OMPT_OPTIONAL
1326  if (ompt_enabled.enabled) {
1327  /* OMPT state update */
1328  ti.state = prev_state;
1329  ti.wait_id = 0;
1330 
1331  /* OMPT event callback */
1332  if (ompt_enabled.ompt_callback_mutex_acquired) {
1333  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1334  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1335  }
1336  }
1337 #endif
1338  KMP_POP_PARTITIONED_TIMER();
1339 
1340  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1341  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1342 #endif // KMP_USE_DYNAMIC_LOCK
1343 }
1344 
1345 #if KMP_USE_DYNAMIC_LOCK
1346 
1347 // Converts the given hint to an internal lock implementation
1348 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1349 #if KMP_USE_TSX
1350 #define KMP_TSX_LOCK(seq) lockseq_##seq
1351 #else
1352 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1353 #endif
1354 
1355 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1356 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1357 #else
1358 #define KMP_CPUINFO_RTM 0
1359 #endif
1360 
1361  // Hints that do not require further logic
1362  if (hint & kmp_lock_hint_hle)
1363  return KMP_TSX_LOCK(hle);
1364  if (hint & kmp_lock_hint_rtm)
1365  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1366  if (hint & kmp_lock_hint_adaptive)
1367  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1368 
1369  // Rule out conflicting hints first by returning the default lock
1370  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1371  return __kmp_user_lock_seq;
1372  if ((hint & omp_lock_hint_speculative) &&
1373  (hint & omp_lock_hint_nonspeculative))
1374  return __kmp_user_lock_seq;
1375 
1376  // Do not even consider speculation when it appears to be contended
1377  if (hint & omp_lock_hint_contended)
1378  return lockseq_queuing;
1379 
1380  // Uncontended lock without speculation
1381  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1382  return lockseq_tas;
1383 
1384  // Use RTM lock for speculation
1385  if (hint & omp_lock_hint_speculative)
1386  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1387 
1388  return __kmp_user_lock_seq;
1389 }
1390 
1391 #if OMPT_SUPPORT && OMPT_OPTIONAL
1392 #if KMP_USE_DYNAMIC_LOCK
1393 static kmp_mutex_impl_t
1394 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1395  if (user_lock) {
1396  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1397  case 0:
1398  break;
1399 #if KMP_USE_FUTEX
1400  case locktag_futex:
1401  return kmp_mutex_impl_queuing;
1402 #endif
1403  case locktag_tas:
1404  return kmp_mutex_impl_spin;
1405 #if KMP_USE_TSX
1406  case locktag_hle:
1407  case locktag_rtm_spin:
1408  return kmp_mutex_impl_speculative;
1409 #endif
1410  default:
1411  return kmp_mutex_impl_none;
1412  }
1413  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1414  }
1415  KMP_ASSERT(ilock);
1416  switch (ilock->type) {
1417 #if KMP_USE_TSX
1418  case locktag_adaptive:
1419  case locktag_rtm_queuing:
1420  return kmp_mutex_impl_speculative;
1421 #endif
1422  case locktag_nested_tas:
1423  return kmp_mutex_impl_spin;
1424 #if KMP_USE_FUTEX
1425  case locktag_nested_futex:
1426 #endif
1427  case locktag_ticket:
1428  case locktag_queuing:
1429  case locktag_drdpa:
1430  case locktag_nested_ticket:
1431  case locktag_nested_queuing:
1432  case locktag_nested_drdpa:
1433  return kmp_mutex_impl_queuing;
1434  default:
1435  return kmp_mutex_impl_none;
1436  }
1437 }
1438 #else
1439 // For locks without dynamic binding
1440 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1441  switch (__kmp_user_lock_kind) {
1442  case lk_tas:
1443  return kmp_mutex_impl_spin;
1444 #if KMP_USE_FUTEX
1445  case lk_futex:
1446 #endif
1447  case lk_ticket:
1448  case lk_queuing:
1449  case lk_drdpa:
1450  return kmp_mutex_impl_queuing;
1451 #if KMP_USE_TSX
1452  case lk_hle:
1453  case lk_rtm_queuing:
1454  case lk_rtm_spin:
1455  case lk_adaptive:
1456  return kmp_mutex_impl_speculative;
1457 #endif
1458  default:
1459  return kmp_mutex_impl_none;
1460  }
1461 }
1462 #endif // KMP_USE_DYNAMIC_LOCK
1463 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1464 
1478 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1479  kmp_critical_name *crit, uint32_t hint) {
1480  KMP_COUNT_BLOCK(OMP_CRITICAL);
1481  kmp_user_lock_p lck;
1482 #if OMPT_SUPPORT && OMPT_OPTIONAL
1483  ompt_state_t prev_state = ompt_state_undefined;
1484  ompt_thread_info_t ti;
1485  // This is the case, if called from __kmpc_critical:
1486  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1487  if (!codeptr)
1488  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1489 #endif
1490 
1491  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1492  __kmp_assert_valid_gtid(global_tid);
1493 
1494  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1495  // Check if it is initialized.
1496  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1497  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1498  if (*lk == 0) {
1499  if (KMP_IS_D_LOCK(lockseq)) {
1500  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1501  KMP_GET_D_TAG(lockseq));
1502  } else {
1503  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1504  }
1505  }
1506  // Branch for accessing the actual lock object and set operation. This
1507  // branching is inevitable since this lock initialization does not follow the
1508  // normal dispatch path (lock table is not used).
1509  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1510  lck = (kmp_user_lock_p)lk;
1511  if (__kmp_env_consistency_check) {
1512  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1513  __kmp_map_hint_to_lock(hint));
1514  }
1515 #if USE_ITT_BUILD
1516  __kmp_itt_critical_acquiring(lck);
1517 #endif
1518 #if OMPT_SUPPORT && OMPT_OPTIONAL
1519  if (ompt_enabled.enabled) {
1520  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1521  /* OMPT state update */
1522  prev_state = ti.state;
1523  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1524  ti.state = ompt_state_wait_critical;
1525 
1526  /* OMPT event callback */
1527  if (ompt_enabled.ompt_callback_mutex_acquire) {
1528  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1529  ompt_mutex_critical, (unsigned int)hint,
1530  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1531  codeptr);
1532  }
1533  }
1534 #endif
1535 #if KMP_USE_INLINED_TAS
1536  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1537  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1538  } else
1539 #elif KMP_USE_INLINED_FUTEX
1540  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1541  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1542  } else
1543 #endif
1544  {
1545  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1546  }
1547  } else {
1548  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1549  lck = ilk->lock;
1550  if (__kmp_env_consistency_check) {
1551  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1552  __kmp_map_hint_to_lock(hint));
1553  }
1554 #if USE_ITT_BUILD
1555  __kmp_itt_critical_acquiring(lck);
1556 #endif
1557 #if OMPT_SUPPORT && OMPT_OPTIONAL
1558  if (ompt_enabled.enabled) {
1559  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1560  /* OMPT state update */
1561  prev_state = ti.state;
1562  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1563  ti.state = ompt_state_wait_critical;
1564 
1565  /* OMPT event callback */
1566  if (ompt_enabled.ompt_callback_mutex_acquire) {
1567  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1568  ompt_mutex_critical, (unsigned int)hint,
1569  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1570  codeptr);
1571  }
1572  }
1573 #endif
1574  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1575  }
1576  KMP_POP_PARTITIONED_TIMER();
1577 
1578 #if USE_ITT_BUILD
1579  __kmp_itt_critical_acquired(lck);
1580 #endif /* USE_ITT_BUILD */
1581 #if OMPT_SUPPORT && OMPT_OPTIONAL
1582  if (ompt_enabled.enabled) {
1583  /* OMPT state update */
1584  ti.state = prev_state;
1585  ti.wait_id = 0;
1586 
1587  /* OMPT event callback */
1588  if (ompt_enabled.ompt_callback_mutex_acquired) {
1589  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1590  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1591  }
1592  }
1593 #endif
1594 
1595  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1596  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1597 } // __kmpc_critical_with_hint
1598 
1599 #endif // KMP_USE_DYNAMIC_LOCK
1600 
1610 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1611  kmp_critical_name *crit) {
1612  kmp_user_lock_p lck;
1613 
1614  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1615 
1616 #if KMP_USE_DYNAMIC_LOCK
1617  int locktag = KMP_EXTRACT_D_TAG(crit);
1618  if (locktag) {
1619  lck = (kmp_user_lock_p)crit;
1620  KMP_ASSERT(lck != NULL);
1621  if (__kmp_env_consistency_check) {
1622  __kmp_pop_sync(global_tid, ct_critical, loc);
1623  }
1624 #if USE_ITT_BUILD
1625  __kmp_itt_critical_releasing(lck);
1626 #endif
1627 #if KMP_USE_INLINED_TAS
1628  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1629  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1630  } else
1631 #elif KMP_USE_INLINED_FUTEX
1632  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1633  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1634  } else
1635 #endif
1636  {
1637  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1638  }
1639  } else {
1640  kmp_indirect_lock_t *ilk =
1641  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1642  KMP_ASSERT(ilk != NULL);
1643  lck = ilk->lock;
1644  if (__kmp_env_consistency_check) {
1645  __kmp_pop_sync(global_tid, ct_critical, loc);
1646  }
1647 #if USE_ITT_BUILD
1648  __kmp_itt_critical_releasing(lck);
1649 #endif
1650  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1651  }
1652 
1653 #else // KMP_USE_DYNAMIC_LOCK
1654 
1655  if ((__kmp_user_lock_kind == lk_tas) &&
1656  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1657  lck = (kmp_user_lock_p)crit;
1658  }
1659 #if KMP_USE_FUTEX
1660  else if ((__kmp_user_lock_kind == lk_futex) &&
1661  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1662  lck = (kmp_user_lock_p)crit;
1663  }
1664 #endif
1665  else { // ticket, queuing or drdpa
1666  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1667  }
1668 
1669  KMP_ASSERT(lck != NULL);
1670 
1671  if (__kmp_env_consistency_check)
1672  __kmp_pop_sync(global_tid, ct_critical, loc);
1673 
1674 #if USE_ITT_BUILD
1675  __kmp_itt_critical_releasing(lck);
1676 #endif /* USE_ITT_BUILD */
1677  // Value of 'crit' should be good for using as a critical_id of the critical
1678  // section directive.
1679  __kmp_release_user_lock_with_checks(lck, global_tid);
1680 
1681 #endif // KMP_USE_DYNAMIC_LOCK
1682 
1683 #if OMPT_SUPPORT && OMPT_OPTIONAL
1684  /* OMPT release event triggers after lock is released; place here to trigger
1685  * for all #if branches */
1686  OMPT_STORE_RETURN_ADDRESS(global_tid);
1687  if (ompt_enabled.ompt_callback_mutex_released) {
1688  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1689  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1690  OMPT_LOAD_RETURN_ADDRESS(0));
1691  }
1692 #endif
1693 
1694  KMP_POP_PARTITIONED_TIMER();
1695  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1696 }
1697 
1707 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1708  int status;
1709  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1710  __kmp_assert_valid_gtid(global_tid);
1711 
1712  if (!TCR_4(__kmp_init_parallel))
1713  __kmp_parallel_initialize();
1714 
1715  __kmp_resume_if_soft_paused();
1716 
1717  if (__kmp_env_consistency_check)
1718  __kmp_check_barrier(global_tid, ct_barrier, loc);
1719 
1720 #if OMPT_SUPPORT
1721  ompt_frame_t *ompt_frame;
1722  if (ompt_enabled.enabled) {
1723  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1724  if (ompt_frame->enter_frame.ptr == NULL)
1725  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1726  }
1727  OMPT_STORE_RETURN_ADDRESS(global_tid);
1728 #endif
1729 #if USE_ITT_NOTIFY
1730  __kmp_threads[global_tid]->th.th_ident = loc;
1731 #endif
1732  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1733 #if OMPT_SUPPORT && OMPT_OPTIONAL
1734  if (ompt_enabled.enabled) {
1735  ompt_frame->enter_frame = ompt_data_none;
1736  }
1737 #endif
1738 
1739  return (status != 0) ? 0 : 1;
1740 }
1741 
1751 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1752  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1753  __kmp_assert_valid_gtid(global_tid);
1754  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1755 }
1756 
1767 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1768  kmp_int32 ret;
1769  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1770  __kmp_assert_valid_gtid(global_tid);
1771 
1772  if (!TCR_4(__kmp_init_parallel))
1773  __kmp_parallel_initialize();
1774 
1775  __kmp_resume_if_soft_paused();
1776 
1777  if (__kmp_env_consistency_check) {
1778  if (loc == 0) {
1779  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1780  }
1781  __kmp_check_barrier(global_tid, ct_barrier, loc);
1782  }
1783 
1784 #if OMPT_SUPPORT
1785  ompt_frame_t *ompt_frame;
1786  if (ompt_enabled.enabled) {
1787  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1788  if (ompt_frame->enter_frame.ptr == NULL)
1789  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1790  }
1791  OMPT_STORE_RETURN_ADDRESS(global_tid);
1792 #endif
1793 #if USE_ITT_NOTIFY
1794  __kmp_threads[global_tid]->th.th_ident = loc;
1795 #endif
1796  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1797 #if OMPT_SUPPORT && OMPT_OPTIONAL
1798  if (ompt_enabled.enabled) {
1799  ompt_frame->enter_frame = ompt_data_none;
1800  }
1801 #endif
1802 
1803  ret = __kmpc_master(loc, global_tid);
1804 
1805  if (__kmp_env_consistency_check) {
1806  /* there's no __kmpc_end_master called; so the (stats) */
1807  /* actions of __kmpc_end_master are done here */
1808  if (ret) {
1809  /* only one thread should do the pop since only */
1810  /* one did the push (see __kmpc_master()) */
1811  __kmp_pop_sync(global_tid, ct_master, loc);
1812  }
1813  }
1814 
1815  return (ret);
1816 }
1817 
1818 /* The BARRIER for a SINGLE process section is always explicit */
1830 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1831  __kmp_assert_valid_gtid(global_tid);
1832  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1833 
1834  if (rc) {
1835  // We are going to execute the single statement, so we should count it.
1836  KMP_COUNT_BLOCK(OMP_SINGLE);
1837  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1838  }
1839 
1840 #if OMPT_SUPPORT && OMPT_OPTIONAL
1841  kmp_info_t *this_thr = __kmp_threads[global_tid];
1842  kmp_team_t *team = this_thr->th.th_team;
1843  int tid = __kmp_tid_from_gtid(global_tid);
1844 
1845  if (ompt_enabled.enabled) {
1846  if (rc) {
1847  if (ompt_enabled.ompt_callback_work) {
1848  ompt_callbacks.ompt_callback(ompt_callback_work)(
1849  ompt_work_single_executor, ompt_scope_begin,
1850  &(team->t.ompt_team_info.parallel_data),
1851  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1852  1, OMPT_GET_RETURN_ADDRESS(0));
1853  }
1854  } else {
1855  if (ompt_enabled.ompt_callback_work) {
1856  ompt_callbacks.ompt_callback(ompt_callback_work)(
1857  ompt_work_single_other, ompt_scope_begin,
1858  &(team->t.ompt_team_info.parallel_data),
1859  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1860  1, OMPT_GET_RETURN_ADDRESS(0));
1861  ompt_callbacks.ompt_callback(ompt_callback_work)(
1862  ompt_work_single_other, ompt_scope_end,
1863  &(team->t.ompt_team_info.parallel_data),
1864  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1865  1, OMPT_GET_RETURN_ADDRESS(0));
1866  }
1867  }
1868  }
1869 #endif
1870 
1871  return rc;
1872 }
1873 
1883 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1884  __kmp_assert_valid_gtid(global_tid);
1885  __kmp_exit_single(global_tid);
1886  KMP_POP_PARTITIONED_TIMER();
1887 
1888 #if OMPT_SUPPORT && OMPT_OPTIONAL
1889  kmp_info_t *this_thr = __kmp_threads[global_tid];
1890  kmp_team_t *team = this_thr->th.th_team;
1891  int tid = __kmp_tid_from_gtid(global_tid);
1892 
1893  if (ompt_enabled.ompt_callback_work) {
1894  ompt_callbacks.ompt_callback(ompt_callback_work)(
1895  ompt_work_single_executor, ompt_scope_end,
1896  &(team->t.ompt_team_info.parallel_data),
1897  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1898  OMPT_GET_RETURN_ADDRESS(0));
1899  }
1900 #endif
1901 }
1902 
1910 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1911  KMP_POP_PARTITIONED_TIMER();
1912  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1913 
1914 #if OMPT_SUPPORT && OMPT_OPTIONAL
1915  if (ompt_enabled.ompt_callback_work) {
1916  ompt_work_t ompt_work_type = ompt_work_loop;
1917  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1918  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1919  // Determine workshare type
1920  if (loc != NULL) {
1921  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1922  ompt_work_type = ompt_work_loop;
1923  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1924  ompt_work_type = ompt_work_sections;
1925  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1926  ompt_work_type = ompt_work_distribute;
1927  } else {
1928  // use default set above.
1929  // a warning about this case is provided in __kmpc_for_static_init
1930  }
1931  KMP_DEBUG_ASSERT(ompt_work_type);
1932  }
1933  ompt_callbacks.ompt_callback(ompt_callback_work)(
1934  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1935  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1936  }
1937 #endif
1938  if (__kmp_env_consistency_check)
1939  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1940 }
1941 
1942 // User routines which take C-style arguments (call by value)
1943 // different from the Fortran equivalent routines
1944 
1945 void ompc_set_num_threads(int arg) {
1946  // !!!!! TODO: check the per-task binding
1947  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1948 }
1949 
1950 void ompc_set_dynamic(int flag) {
1951  kmp_info_t *thread;
1952 
1953  /* For the thread-private implementation of the internal controls */
1954  thread = __kmp_entry_thread();
1955 
1956  __kmp_save_internal_controls(thread);
1957 
1958  set__dynamic(thread, flag ? true : false);
1959 }
1960 
1961 void ompc_set_nested(int flag) {
1962  kmp_info_t *thread;
1963 
1964  /* For the thread-private internal controls implementation */
1965  thread = __kmp_entry_thread();
1966 
1967  __kmp_save_internal_controls(thread);
1968 
1969  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1970 }
1971 
1972 void ompc_set_max_active_levels(int max_active_levels) {
1973  /* TO DO */
1974  /* we want per-task implementation of this internal control */
1975 
1976  /* For the per-thread internal controls implementation */
1977  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1978 }
1979 
1980 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1981  // !!!!! TODO: check the per-task binding
1982  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1983 }
1984 
1985 int ompc_get_ancestor_thread_num(int level) {
1986  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1987 }
1988 
1989 int ompc_get_team_size(int level) {
1990  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1991 }
1992 
1993 /* OpenMP 5.0 Affinity Format API */
1994 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
1995  if (!__kmp_init_serial) {
1996  __kmp_serial_initialize();
1997  }
1998  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1999  format, KMP_STRLEN(format) + 1);
2000 }
2001 
2002 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2003  size_t format_size;
2004  if (!__kmp_init_serial) {
2005  __kmp_serial_initialize();
2006  }
2007  format_size = KMP_STRLEN(__kmp_affinity_format);
2008  if (buffer && size) {
2009  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2010  format_size + 1);
2011  }
2012  return format_size;
2013 }
2014 
2015 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2016  int gtid;
2017  if (!TCR_4(__kmp_init_middle)) {
2018  __kmp_middle_initialize();
2019  }
2020  __kmp_assign_root_init_mask();
2021  gtid = __kmp_get_gtid();
2022 #if KMP_AFFINITY_SUPPORTED
2023  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2024  __kmp_affinity.flags.reset) {
2025  __kmp_reset_root_init_mask(gtid);
2026  }
2027 #endif
2028  __kmp_aux_display_affinity(gtid, format);
2029 }
2030 
2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2032  char const *format) {
2033  int gtid;
2034  size_t num_required;
2035  kmp_str_buf_t capture_buf;
2036  if (!TCR_4(__kmp_init_middle)) {
2037  __kmp_middle_initialize();
2038  }
2039  __kmp_assign_root_init_mask();
2040  gtid = __kmp_get_gtid();
2041 #if KMP_AFFINITY_SUPPORTED
2042  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2043  __kmp_affinity.flags.reset) {
2044  __kmp_reset_root_init_mask(gtid);
2045  }
2046 #endif
2047  __kmp_str_buf_init(&capture_buf);
2048  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2049  if (buffer && buf_size) {
2050  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2051  capture_buf.used + 1);
2052  }
2053  __kmp_str_buf_free(&capture_buf);
2054  return num_required;
2055 }
2056 
2057 void kmpc_set_stacksize(int arg) {
2058  // __kmp_aux_set_stacksize initializes the library if needed
2059  __kmp_aux_set_stacksize(arg);
2060 }
2061 
2062 void kmpc_set_stacksize_s(size_t arg) {
2063  // __kmp_aux_set_stacksize initializes the library if needed
2064  __kmp_aux_set_stacksize(arg);
2065 }
2066 
2067 void kmpc_set_blocktime(int arg) {
2068  int gtid, tid;
2069  kmp_info_t *thread;
2070 
2071  gtid = __kmp_entry_gtid();
2072  tid = __kmp_tid_from_gtid(gtid);
2073  thread = __kmp_thread_from_gtid(gtid);
2074 
2075  __kmp_aux_set_blocktime(arg, thread, tid);
2076 }
2077 
2078 void kmpc_set_library(int arg) {
2079  // __kmp_user_set_library initializes the library if needed
2080  __kmp_user_set_library((enum library_type)arg);
2081 }
2082 
2083 void kmpc_set_defaults(char const *str) {
2084  // __kmp_aux_set_defaults initializes the library if needed
2085  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2086 }
2087 
2088 void kmpc_set_disp_num_buffers(int arg) {
2089  // ignore after initialization because some teams have already
2090  // allocated dispatch buffers
2091  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2092  arg <= KMP_MAX_DISP_NUM_BUFF) {
2093  __kmp_dispatch_num_buffers = arg;
2094  }
2095 }
2096 
2097 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2098 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2099  return -1;
2100 #else
2101  if (!TCR_4(__kmp_init_middle)) {
2102  __kmp_middle_initialize();
2103  }
2104  __kmp_assign_root_init_mask();
2105  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2106 #endif
2107 }
2108 
2109 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2110 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2111  return -1;
2112 #else
2113  if (!TCR_4(__kmp_init_middle)) {
2114  __kmp_middle_initialize();
2115  }
2116  __kmp_assign_root_init_mask();
2117  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2118 #endif
2119 }
2120 
2121 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2122 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2123  return -1;
2124 #else
2125  if (!TCR_4(__kmp_init_middle)) {
2126  __kmp_middle_initialize();
2127  }
2128  __kmp_assign_root_init_mask();
2129  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2130 #endif
2131 }
2132 
2133 /* -------------------------------------------------------------------------- */
2178 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2179  void *cpy_data, void (*cpy_func)(void *, void *),
2180  kmp_int32 didit) {
2181  void **data_ptr;
2182  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2183  __kmp_assert_valid_gtid(gtid);
2184 
2185  KMP_MB();
2186 
2187  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2188 
2189  if (__kmp_env_consistency_check) {
2190  if (loc == 0) {
2191  KMP_WARNING(ConstructIdentInvalid);
2192  }
2193  }
2194 
2195  // ToDo: Optimize the following two barriers into some kind of split barrier
2196 
2197  if (didit)
2198  *data_ptr = cpy_data;
2199 
2200 #if OMPT_SUPPORT
2201  ompt_frame_t *ompt_frame;
2202  if (ompt_enabled.enabled) {
2203  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2204  if (ompt_frame->enter_frame.ptr == NULL)
2205  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2206  }
2207  OMPT_STORE_RETURN_ADDRESS(gtid);
2208 #endif
2209 /* This barrier is not a barrier region boundary */
2210 #if USE_ITT_NOTIFY
2211  __kmp_threads[gtid]->th.th_ident = loc;
2212 #endif
2213  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2214 
2215  if (!didit)
2216  (*cpy_func)(cpy_data, *data_ptr);
2217 
2218  // Consider next barrier a user-visible barrier for barrier region boundaries
2219  // Nesting checks are already handled by the single construct checks
2220  {
2221 #if OMPT_SUPPORT
2222  OMPT_STORE_RETURN_ADDRESS(gtid);
2223 #endif
2224 #if USE_ITT_NOTIFY
2225  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2226 // tasks can overwrite the location)
2227 #endif
2228  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2229 #if OMPT_SUPPORT && OMPT_OPTIONAL
2230  if (ompt_enabled.enabled) {
2231  ompt_frame->enter_frame = ompt_data_none;
2232  }
2233 #endif
2234  }
2235 }
2236 
2237 /* --------------------------------------------------------------------------*/
2254 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2255  void **data_ptr;
2256 
2257  KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2258 
2259  KMP_MB();
2260 
2261  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2262 
2263  if (__kmp_env_consistency_check) {
2264  if (loc == 0) {
2265  KMP_WARNING(ConstructIdentInvalid);
2266  }
2267  }
2268 
2269  // ToDo: Optimize the following barrier
2270 
2271  if (cpy_data)
2272  *data_ptr = cpy_data;
2273 
2274 #if OMPT_SUPPORT
2275  ompt_frame_t *ompt_frame;
2276  if (ompt_enabled.enabled) {
2277  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2278  if (ompt_frame->enter_frame.ptr == NULL)
2279  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2280  OMPT_STORE_RETURN_ADDRESS(gtid);
2281  }
2282 #endif
2283 /* This barrier is not a barrier region boundary */
2284 #if USE_ITT_NOTIFY
2285  __kmp_threads[gtid]->th.th_ident = loc;
2286 #endif
2287  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2288 
2289  return *data_ptr;
2290 }
2291 
2292 /* -------------------------------------------------------------------------- */
2293 
2294 #define INIT_LOCK __kmp_init_user_lock_with_checks
2295 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2296 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2297 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2298 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2299 #define ACQUIRE_NESTED_LOCK_TIMED \
2300  __kmp_acquire_nested_user_lock_with_checks_timed
2301 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2302 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2303 #define TEST_LOCK __kmp_test_user_lock_with_checks
2304 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2305 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2306 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2307 
2308 // TODO: Make check abort messages use location info & pass it into
2309 // with_checks routines
2310 
2311 #if KMP_USE_DYNAMIC_LOCK
2312 
2313 // internal lock initializer
2314 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2315  kmp_dyna_lockseq_t seq) {
2316  if (KMP_IS_D_LOCK(seq)) {
2317  KMP_INIT_D_LOCK(lock, seq);
2318 #if USE_ITT_BUILD
2319  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2320 #endif
2321  } else {
2322  KMP_INIT_I_LOCK(lock, seq);
2323 #if USE_ITT_BUILD
2324  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2325  __kmp_itt_lock_creating(ilk->lock, loc);
2326 #endif
2327  }
2328 }
2329 
2330 // internal nest lock initializer
2331 static __forceinline void
2332 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2333  kmp_dyna_lockseq_t seq) {
2334 #if KMP_USE_TSX
2335  // Don't have nested lock implementation for speculative locks
2336  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2337  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2338  seq = __kmp_user_lock_seq;
2339 #endif
2340  switch (seq) {
2341  case lockseq_tas:
2342  seq = lockseq_nested_tas;
2343  break;
2344 #if KMP_USE_FUTEX
2345  case lockseq_futex:
2346  seq = lockseq_nested_futex;
2347  break;
2348 #endif
2349  case lockseq_ticket:
2350  seq = lockseq_nested_ticket;
2351  break;
2352  case lockseq_queuing:
2353  seq = lockseq_nested_queuing;
2354  break;
2355  case lockseq_drdpa:
2356  seq = lockseq_nested_drdpa;
2357  break;
2358  default:
2359  seq = lockseq_nested_queuing;
2360  }
2361  KMP_INIT_I_LOCK(lock, seq);
2362 #if USE_ITT_BUILD
2363  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2364  __kmp_itt_lock_creating(ilk->lock, loc);
2365 #endif
2366 }
2367 
2368 /* initialize the lock with a hint */
2369 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2370  uintptr_t hint) {
2371  KMP_DEBUG_ASSERT(__kmp_init_serial);
2372  if (__kmp_env_consistency_check && user_lock == NULL) {
2373  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2374  }
2375 
2376  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2377 
2378 #if OMPT_SUPPORT && OMPT_OPTIONAL
2379  // This is the case, if called from omp_init_lock_with_hint:
2380  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2381  if (!codeptr)
2382  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2383  if (ompt_enabled.ompt_callback_lock_init) {
2384  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2385  ompt_mutex_lock, (omp_lock_hint_t)hint,
2386  __ompt_get_mutex_impl_type(user_lock),
2387  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2388  }
2389 #endif
2390 }
2391 
2392 /* initialize the lock with a hint */
2393 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2394  void **user_lock, uintptr_t hint) {
2395  KMP_DEBUG_ASSERT(__kmp_init_serial);
2396  if (__kmp_env_consistency_check && user_lock == NULL) {
2397  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2398  }
2399 
2400  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2401 
2402 #if OMPT_SUPPORT && OMPT_OPTIONAL
2403  // This is the case, if called from omp_init_lock_with_hint:
2404  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2405  if (!codeptr)
2406  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2407  if (ompt_enabled.ompt_callback_lock_init) {
2408  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2409  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2410  __ompt_get_mutex_impl_type(user_lock),
2411  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2412  }
2413 #endif
2414 }
2415 
2416 #endif // KMP_USE_DYNAMIC_LOCK
2417 
2418 /* initialize the lock */
2419 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2420 #if KMP_USE_DYNAMIC_LOCK
2421 
2422  KMP_DEBUG_ASSERT(__kmp_init_serial);
2423  if (__kmp_env_consistency_check && user_lock == NULL) {
2424  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2425  }
2426  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2427 
2428 #if OMPT_SUPPORT && OMPT_OPTIONAL
2429  // This is the case, if called from omp_init_lock_with_hint:
2430  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2431  if (!codeptr)
2432  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433  if (ompt_enabled.ompt_callback_lock_init) {
2434  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2435  ompt_mutex_lock, omp_lock_hint_none,
2436  __ompt_get_mutex_impl_type(user_lock),
2437  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2438  }
2439 #endif
2440 
2441 #else // KMP_USE_DYNAMIC_LOCK
2442 
2443  static char const *const func = "omp_init_lock";
2444  kmp_user_lock_p lck;
2445  KMP_DEBUG_ASSERT(__kmp_init_serial);
2446 
2447  if (__kmp_env_consistency_check) {
2448  if (user_lock == NULL) {
2449  KMP_FATAL(LockIsUninitialized, func);
2450  }
2451  }
2452 
2453  KMP_CHECK_USER_LOCK_INIT();
2454 
2455  if ((__kmp_user_lock_kind == lk_tas) &&
2456  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2457  lck = (kmp_user_lock_p)user_lock;
2458  }
2459 #if KMP_USE_FUTEX
2460  else if ((__kmp_user_lock_kind == lk_futex) &&
2461  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462  lck = (kmp_user_lock_p)user_lock;
2463  }
2464 #endif
2465  else {
2466  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2467  }
2468  INIT_LOCK(lck);
2469  __kmp_set_user_lock_location(lck, loc);
2470 
2471 #if OMPT_SUPPORT && OMPT_OPTIONAL
2472  // This is the case, if called from omp_init_lock_with_hint:
2473  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2474  if (!codeptr)
2475  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2476  if (ompt_enabled.ompt_callback_lock_init) {
2477  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2478  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2479  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2480  }
2481 #endif
2482 
2483 #if USE_ITT_BUILD
2484  __kmp_itt_lock_creating(lck);
2485 #endif /* USE_ITT_BUILD */
2486 
2487 #endif // KMP_USE_DYNAMIC_LOCK
2488 } // __kmpc_init_lock
2489 
2490 /* initialize the lock */
2491 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2492 #if KMP_USE_DYNAMIC_LOCK
2493 
2494  KMP_DEBUG_ASSERT(__kmp_init_serial);
2495  if (__kmp_env_consistency_check && user_lock == NULL) {
2496  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2497  }
2498  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2499 
2500 #if OMPT_SUPPORT && OMPT_OPTIONAL
2501  // This is the case, if called from omp_init_lock_with_hint:
2502  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2503  if (!codeptr)
2504  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2505  if (ompt_enabled.ompt_callback_lock_init) {
2506  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2507  ompt_mutex_nest_lock, omp_lock_hint_none,
2508  __ompt_get_mutex_impl_type(user_lock),
2509  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2510  }
2511 #endif
2512 
2513 #else // KMP_USE_DYNAMIC_LOCK
2514 
2515  static char const *const func = "omp_init_nest_lock";
2516  kmp_user_lock_p lck;
2517  KMP_DEBUG_ASSERT(__kmp_init_serial);
2518 
2519  if (__kmp_env_consistency_check) {
2520  if (user_lock == NULL) {
2521  KMP_FATAL(LockIsUninitialized, func);
2522  }
2523  }
2524 
2525  KMP_CHECK_USER_LOCK_INIT();
2526 
2527  if ((__kmp_user_lock_kind == lk_tas) &&
2528  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2529  OMP_NEST_LOCK_T_SIZE)) {
2530  lck = (kmp_user_lock_p)user_lock;
2531  }
2532 #if KMP_USE_FUTEX
2533  else if ((__kmp_user_lock_kind == lk_futex) &&
2534  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2535  OMP_NEST_LOCK_T_SIZE)) {
2536  lck = (kmp_user_lock_p)user_lock;
2537  }
2538 #endif
2539  else {
2540  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2541  }
2542 
2543  INIT_NESTED_LOCK(lck);
2544  __kmp_set_user_lock_location(lck, loc);
2545 
2546 #if OMPT_SUPPORT && OMPT_OPTIONAL
2547  // This is the case, if called from omp_init_lock_with_hint:
2548  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2549  if (!codeptr)
2550  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2551  if (ompt_enabled.ompt_callback_lock_init) {
2552  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2553  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2554  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2555  }
2556 #endif
2557 
2558 #if USE_ITT_BUILD
2559  __kmp_itt_lock_creating(lck);
2560 #endif /* USE_ITT_BUILD */
2561 
2562 #endif // KMP_USE_DYNAMIC_LOCK
2563 } // __kmpc_init_nest_lock
2564 
2565 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2566 #if KMP_USE_DYNAMIC_LOCK
2567 
2568 #if USE_ITT_BUILD
2569  kmp_user_lock_p lck;
2570  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2571  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2572  } else {
2573  lck = (kmp_user_lock_p)user_lock;
2574  }
2575  __kmp_itt_lock_destroyed(lck);
2576 #endif
2577 #if OMPT_SUPPORT && OMPT_OPTIONAL
2578  // This is the case, if called from omp_init_lock_with_hint:
2579  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2580  if (!codeptr)
2581  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2582  if (ompt_enabled.ompt_callback_lock_destroy) {
2583  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2584  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2585  }
2586 #endif
2587  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2588 #else
2589  kmp_user_lock_p lck;
2590 
2591  if ((__kmp_user_lock_kind == lk_tas) &&
2592  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2593  lck = (kmp_user_lock_p)user_lock;
2594  }
2595 #if KMP_USE_FUTEX
2596  else if ((__kmp_user_lock_kind == lk_futex) &&
2597  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598  lck = (kmp_user_lock_p)user_lock;
2599  }
2600 #endif
2601  else {
2602  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2603  }
2604 
2605 #if OMPT_SUPPORT && OMPT_OPTIONAL
2606  // This is the case, if called from omp_init_lock_with_hint:
2607  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2608  if (!codeptr)
2609  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2610  if (ompt_enabled.ompt_callback_lock_destroy) {
2611  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2612  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2613  }
2614 #endif
2615 
2616 #if USE_ITT_BUILD
2617  __kmp_itt_lock_destroyed(lck);
2618 #endif /* USE_ITT_BUILD */
2619  DESTROY_LOCK(lck);
2620 
2621  if ((__kmp_user_lock_kind == lk_tas) &&
2622  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2623  ;
2624  }
2625 #if KMP_USE_FUTEX
2626  else if ((__kmp_user_lock_kind == lk_futex) &&
2627  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2628  ;
2629  }
2630 #endif
2631  else {
2632  __kmp_user_lock_free(user_lock, gtid, lck);
2633  }
2634 #endif // KMP_USE_DYNAMIC_LOCK
2635 } // __kmpc_destroy_lock
2636 
2637 /* destroy the lock */
2638 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2639 #if KMP_USE_DYNAMIC_LOCK
2640 
2641 #if USE_ITT_BUILD
2642  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2643  __kmp_itt_lock_destroyed(ilk->lock);
2644 #endif
2645 #if OMPT_SUPPORT && OMPT_OPTIONAL
2646  // This is the case, if called from omp_init_lock_with_hint:
2647  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2648  if (!codeptr)
2649  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2650  if (ompt_enabled.ompt_callback_lock_destroy) {
2651  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2652  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2653  }
2654 #endif
2655  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2656 
2657 #else // KMP_USE_DYNAMIC_LOCK
2658 
2659  kmp_user_lock_p lck;
2660 
2661  if ((__kmp_user_lock_kind == lk_tas) &&
2662  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2663  OMP_NEST_LOCK_T_SIZE)) {
2664  lck = (kmp_user_lock_p)user_lock;
2665  }
2666 #if KMP_USE_FUTEX
2667  else if ((__kmp_user_lock_kind == lk_futex) &&
2668  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2669  OMP_NEST_LOCK_T_SIZE)) {
2670  lck = (kmp_user_lock_p)user_lock;
2671  }
2672 #endif
2673  else {
2674  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2675  }
2676 
2677 #if OMPT_SUPPORT && OMPT_OPTIONAL
2678  // This is the case, if called from omp_init_lock_with_hint:
2679  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2680  if (!codeptr)
2681  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2682  if (ompt_enabled.ompt_callback_lock_destroy) {
2683  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2684  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2685  }
2686 #endif
2687 
2688 #if USE_ITT_BUILD
2689  __kmp_itt_lock_destroyed(lck);
2690 #endif /* USE_ITT_BUILD */
2691 
2692  DESTROY_NESTED_LOCK(lck);
2693 
2694  if ((__kmp_user_lock_kind == lk_tas) &&
2695  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2696  OMP_NEST_LOCK_T_SIZE)) {
2697  ;
2698  }
2699 #if KMP_USE_FUTEX
2700  else if ((__kmp_user_lock_kind == lk_futex) &&
2701  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2702  OMP_NEST_LOCK_T_SIZE)) {
2703  ;
2704  }
2705 #endif
2706  else {
2707  __kmp_user_lock_free(user_lock, gtid, lck);
2708  }
2709 #endif // KMP_USE_DYNAMIC_LOCK
2710 } // __kmpc_destroy_nest_lock
2711 
2712 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2713  KMP_COUNT_BLOCK(OMP_set_lock);
2714 #if KMP_USE_DYNAMIC_LOCK
2715  int tag = KMP_EXTRACT_D_TAG(user_lock);
2716 #if USE_ITT_BUILD
2717  __kmp_itt_lock_acquiring(
2718  (kmp_user_lock_p)
2719  user_lock); // itt function will get to the right lock object.
2720 #endif
2721 #if OMPT_SUPPORT && OMPT_OPTIONAL
2722  // This is the case, if called from omp_init_lock_with_hint:
2723  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2724  if (!codeptr)
2725  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2726  if (ompt_enabled.ompt_callback_mutex_acquire) {
2727  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2728  ompt_mutex_lock, omp_lock_hint_none,
2729  __ompt_get_mutex_impl_type(user_lock),
2730  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2731  }
2732 #endif
2733 #if KMP_USE_INLINED_TAS
2734  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2735  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2736  } else
2737 #elif KMP_USE_INLINED_FUTEX
2738  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2739  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2740  } else
2741 #endif
2742  {
2743  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2744  }
2745 #if USE_ITT_BUILD
2746  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2747 #endif
2748 #if OMPT_SUPPORT && OMPT_OPTIONAL
2749  if (ompt_enabled.ompt_callback_mutex_acquired) {
2750  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2751  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2752  }
2753 #endif
2754 
2755 #else // KMP_USE_DYNAMIC_LOCK
2756 
2757  kmp_user_lock_p lck;
2758 
2759  if ((__kmp_user_lock_kind == lk_tas) &&
2760  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2761  lck = (kmp_user_lock_p)user_lock;
2762  }
2763 #if KMP_USE_FUTEX
2764  else if ((__kmp_user_lock_kind == lk_futex) &&
2765  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2766  lck = (kmp_user_lock_p)user_lock;
2767  }
2768 #endif
2769  else {
2770  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2771  }
2772 
2773 #if USE_ITT_BUILD
2774  __kmp_itt_lock_acquiring(lck);
2775 #endif /* USE_ITT_BUILD */
2776 #if OMPT_SUPPORT && OMPT_OPTIONAL
2777  // This is the case, if called from omp_init_lock_with_hint:
2778  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2779  if (!codeptr)
2780  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2781  if (ompt_enabled.ompt_callback_mutex_acquire) {
2782  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2783  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2784  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2785  }
2786 #endif
2787 
2788  ACQUIRE_LOCK(lck, gtid);
2789 
2790 #if USE_ITT_BUILD
2791  __kmp_itt_lock_acquired(lck);
2792 #endif /* USE_ITT_BUILD */
2793 
2794 #if OMPT_SUPPORT && OMPT_OPTIONAL
2795  if (ompt_enabled.ompt_callback_mutex_acquired) {
2796  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2797  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2798  }
2799 #endif
2800 
2801 #endif // KMP_USE_DYNAMIC_LOCK
2802 }
2803 
2804 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2805 #if KMP_USE_DYNAMIC_LOCK
2806 
2807 #if USE_ITT_BUILD
2808  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2809 #endif
2810 #if OMPT_SUPPORT && OMPT_OPTIONAL
2811  // This is the case, if called from omp_init_lock_with_hint:
2812  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2813  if (!codeptr)
2814  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2815  if (ompt_enabled.enabled) {
2816  if (ompt_enabled.ompt_callback_mutex_acquire) {
2817  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2818  ompt_mutex_nest_lock, omp_lock_hint_none,
2819  __ompt_get_mutex_impl_type(user_lock),
2820  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2821  }
2822  }
2823 #endif
2824  int acquire_status =
2825  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2826  (void)acquire_status;
2827 #if USE_ITT_BUILD
2828  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2829 #endif
2830 
2831 #if OMPT_SUPPORT && OMPT_OPTIONAL
2832  if (ompt_enabled.enabled) {
2833  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2834  if (ompt_enabled.ompt_callback_mutex_acquired) {
2835  // lock_first
2836  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2837  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2838  codeptr);
2839  }
2840  } else {
2841  if (ompt_enabled.ompt_callback_nest_lock) {
2842  // lock_next
2843  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2844  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2845  }
2846  }
2847  }
2848 #endif
2849 
2850 #else // KMP_USE_DYNAMIC_LOCK
2851  int acquire_status;
2852  kmp_user_lock_p lck;
2853 
2854  if ((__kmp_user_lock_kind == lk_tas) &&
2855  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2856  OMP_NEST_LOCK_T_SIZE)) {
2857  lck = (kmp_user_lock_p)user_lock;
2858  }
2859 #if KMP_USE_FUTEX
2860  else if ((__kmp_user_lock_kind == lk_futex) &&
2861  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2862  OMP_NEST_LOCK_T_SIZE)) {
2863  lck = (kmp_user_lock_p)user_lock;
2864  }
2865 #endif
2866  else {
2867  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2868  }
2869 
2870 #if USE_ITT_BUILD
2871  __kmp_itt_lock_acquiring(lck);
2872 #endif /* USE_ITT_BUILD */
2873 #if OMPT_SUPPORT && OMPT_OPTIONAL
2874  // This is the case, if called from omp_init_lock_with_hint:
2875  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2876  if (!codeptr)
2877  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2878  if (ompt_enabled.enabled) {
2879  if (ompt_enabled.ompt_callback_mutex_acquire) {
2880  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2881  ompt_mutex_nest_lock, omp_lock_hint_none,
2882  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2883  codeptr);
2884  }
2885  }
2886 #endif
2887 
2888  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2889 
2890 #if USE_ITT_BUILD
2891  __kmp_itt_lock_acquired(lck);
2892 #endif /* USE_ITT_BUILD */
2893 
2894 #if OMPT_SUPPORT && OMPT_OPTIONAL
2895  if (ompt_enabled.enabled) {
2896  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2897  if (ompt_enabled.ompt_callback_mutex_acquired) {
2898  // lock_first
2899  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2900  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2901  }
2902  } else {
2903  if (ompt_enabled.ompt_callback_nest_lock) {
2904  // lock_next
2905  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2906  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2907  }
2908  }
2909  }
2910 #endif
2911 
2912 #endif // KMP_USE_DYNAMIC_LOCK
2913 }
2914 
2915 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2916 #if KMP_USE_DYNAMIC_LOCK
2917 
2918  int tag = KMP_EXTRACT_D_TAG(user_lock);
2919 #if USE_ITT_BUILD
2920  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2921 #endif
2922 #if KMP_USE_INLINED_TAS
2923  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2924  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2925  } else
2926 #elif KMP_USE_INLINED_FUTEX
2927  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2928  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2929  } else
2930 #endif
2931  {
2932  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2933  }
2934 
2935 #if OMPT_SUPPORT && OMPT_OPTIONAL
2936  // This is the case, if called from omp_init_lock_with_hint:
2937  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2938  if (!codeptr)
2939  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940  if (ompt_enabled.ompt_callback_mutex_released) {
2941  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2943  }
2944 #endif
2945 
2946 #else // KMP_USE_DYNAMIC_LOCK
2947 
2948  kmp_user_lock_p lck;
2949 
2950  /* Can't use serial interval since not block structured */
2951  /* release the lock */
2952 
2953  if ((__kmp_user_lock_kind == lk_tas) &&
2954  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2955 #if KMP_OS_LINUX && \
2956  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2957 // "fast" path implemented to fix customer performance issue
2958 #if USE_ITT_BUILD
2959  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2960 #endif /* USE_ITT_BUILD */
2961  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2962  KMP_MB();
2963 
2964 #if OMPT_SUPPORT && OMPT_OPTIONAL
2965  // This is the case, if called from omp_init_lock_with_hint:
2966  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2967  if (!codeptr)
2968  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2969  if (ompt_enabled.ompt_callback_mutex_released) {
2970  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2971  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2972  }
2973 #endif
2974 
2975  return;
2976 #else
2977  lck = (kmp_user_lock_p)user_lock;
2978 #endif
2979  }
2980 #if KMP_USE_FUTEX
2981  else if ((__kmp_user_lock_kind == lk_futex) &&
2982  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2983  lck = (kmp_user_lock_p)user_lock;
2984  }
2985 #endif
2986  else {
2987  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2988  }
2989 
2990 #if USE_ITT_BUILD
2991  __kmp_itt_lock_releasing(lck);
2992 #endif /* USE_ITT_BUILD */
2993 
2994  RELEASE_LOCK(lck, gtid);
2995 
2996 #if OMPT_SUPPORT && OMPT_OPTIONAL
2997  // This is the case, if called from omp_init_lock_with_hint:
2998  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2999  if (!codeptr)
3000  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3001  if (ompt_enabled.ompt_callback_mutex_released) {
3002  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3003  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3004  }
3005 #endif
3006 
3007 #endif // KMP_USE_DYNAMIC_LOCK
3008 }
3009 
3010 /* release the lock */
3011 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3012 #if KMP_USE_DYNAMIC_LOCK
3013 
3014 #if USE_ITT_BUILD
3015  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3016 #endif
3017  int release_status =
3018  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3019  (void)release_status;
3020 
3021 #if OMPT_SUPPORT && OMPT_OPTIONAL
3022  // This is the case, if called from omp_init_lock_with_hint:
3023  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3024  if (!codeptr)
3025  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3026  if (ompt_enabled.enabled) {
3027  if (release_status == KMP_LOCK_RELEASED) {
3028  if (ompt_enabled.ompt_callback_mutex_released) {
3029  // release_lock_last
3030  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3031  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3032  codeptr);
3033  }
3034  } else if (ompt_enabled.ompt_callback_nest_lock) {
3035  // release_lock_prev
3036  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3037  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3038  }
3039  }
3040 #endif
3041 
3042 #else // KMP_USE_DYNAMIC_LOCK
3043 
3044  kmp_user_lock_p lck;
3045 
3046  /* Can't use serial interval since not block structured */
3047 
3048  if ((__kmp_user_lock_kind == lk_tas) &&
3049  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3050  OMP_NEST_LOCK_T_SIZE)) {
3051 #if KMP_OS_LINUX && \
3052  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3053  // "fast" path implemented to fix customer performance issue
3054  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3055 #if USE_ITT_BUILD
3056  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3057 #endif /* USE_ITT_BUILD */
3058 
3059 #if OMPT_SUPPORT && OMPT_OPTIONAL
3060  int release_status = KMP_LOCK_STILL_HELD;
3061 #endif
3062 
3063  if (--(tl->lk.depth_locked) == 0) {
3064  TCW_4(tl->lk.poll, 0);
3065 #if OMPT_SUPPORT && OMPT_OPTIONAL
3066  release_status = KMP_LOCK_RELEASED;
3067 #endif
3068  }
3069  KMP_MB();
3070 
3071 #if OMPT_SUPPORT && OMPT_OPTIONAL
3072  // This is the case, if called from omp_init_lock_with_hint:
3073  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3074  if (!codeptr)
3075  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3076  if (ompt_enabled.enabled) {
3077  if (release_status == KMP_LOCK_RELEASED) {
3078  if (ompt_enabled.ompt_callback_mutex_released) {
3079  // release_lock_last
3080  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3081  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3082  }
3083  } else if (ompt_enabled.ompt_callback_nest_lock) {
3084  // release_lock_previous
3085  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3086  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3087  }
3088  }
3089 #endif
3090 
3091  return;
3092 #else
3093  lck = (kmp_user_lock_p)user_lock;
3094 #endif
3095  }
3096 #if KMP_USE_FUTEX
3097  else if ((__kmp_user_lock_kind == lk_futex) &&
3098  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3099  OMP_NEST_LOCK_T_SIZE)) {
3100  lck = (kmp_user_lock_p)user_lock;
3101  }
3102 #endif
3103  else {
3104  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3105  }
3106 
3107 #if USE_ITT_BUILD
3108  __kmp_itt_lock_releasing(lck);
3109 #endif /* USE_ITT_BUILD */
3110 
3111  int release_status;
3112  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3113 #if OMPT_SUPPORT && OMPT_OPTIONAL
3114  // This is the case, if called from omp_init_lock_with_hint:
3115  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3116  if (!codeptr)
3117  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3118  if (ompt_enabled.enabled) {
3119  if (release_status == KMP_LOCK_RELEASED) {
3120  if (ompt_enabled.ompt_callback_mutex_released) {
3121  // release_lock_last
3122  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3123  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3124  }
3125  } else if (ompt_enabled.ompt_callback_nest_lock) {
3126  // release_lock_previous
3127  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3128  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3129  }
3130  }
3131 #endif
3132 
3133 #endif // KMP_USE_DYNAMIC_LOCK
3134 }
3135 
3136 /* try to acquire the lock */
3137 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3138  KMP_COUNT_BLOCK(OMP_test_lock);
3139 
3140 #if KMP_USE_DYNAMIC_LOCK
3141  int rc;
3142  int tag = KMP_EXTRACT_D_TAG(user_lock);
3143 #if USE_ITT_BUILD
3144  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3145 #endif
3146 #if OMPT_SUPPORT && OMPT_OPTIONAL
3147  // This is the case, if called from omp_init_lock_with_hint:
3148  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3149  if (!codeptr)
3150  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3151  if (ompt_enabled.ompt_callback_mutex_acquire) {
3152  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3153  ompt_mutex_test_lock, omp_lock_hint_none,
3154  __ompt_get_mutex_impl_type(user_lock),
3155  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3156  }
3157 #endif
3158 #if KMP_USE_INLINED_TAS
3159  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3160  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3161  } else
3162 #elif KMP_USE_INLINED_FUTEX
3163  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3164  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3165  } else
3166 #endif
3167  {
3168  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3169  }
3170  if (rc) {
3171 #if USE_ITT_BUILD
3172  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3173 #endif
3174 #if OMPT_SUPPORT && OMPT_OPTIONAL
3175  if (ompt_enabled.ompt_callback_mutex_acquired) {
3176  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3177  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3178  }
3179 #endif
3180  return FTN_TRUE;
3181  } else {
3182 #if USE_ITT_BUILD
3183  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3184 #endif
3185  return FTN_FALSE;
3186  }
3187 
3188 #else // KMP_USE_DYNAMIC_LOCK
3189 
3190  kmp_user_lock_p lck;
3191  int rc;
3192 
3193  if ((__kmp_user_lock_kind == lk_tas) &&
3194  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3195  lck = (kmp_user_lock_p)user_lock;
3196  }
3197 #if KMP_USE_FUTEX
3198  else if ((__kmp_user_lock_kind == lk_futex) &&
3199  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3200  lck = (kmp_user_lock_p)user_lock;
3201  }
3202 #endif
3203  else {
3204  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3205  }
3206 
3207 #if USE_ITT_BUILD
3208  __kmp_itt_lock_acquiring(lck);
3209 #endif /* USE_ITT_BUILD */
3210 #if OMPT_SUPPORT && OMPT_OPTIONAL
3211  // This is the case, if called from omp_init_lock_with_hint:
3212  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3213  if (!codeptr)
3214  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3215  if (ompt_enabled.ompt_callback_mutex_acquire) {
3216  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3217  ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3218  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3219  }
3220 #endif
3221 
3222  rc = TEST_LOCK(lck, gtid);
3223 #if USE_ITT_BUILD
3224  if (rc) {
3225  __kmp_itt_lock_acquired(lck);
3226  } else {
3227  __kmp_itt_lock_cancelled(lck);
3228  }
3229 #endif /* USE_ITT_BUILD */
3230 #if OMPT_SUPPORT && OMPT_OPTIONAL
3231  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3232  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3233  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3234  }
3235 #endif
3236 
3237  return (rc ? FTN_TRUE : FTN_FALSE);
3238 
3239  /* Can't use serial interval since not block structured */
3240 
3241 #endif // KMP_USE_DYNAMIC_LOCK
3242 }
3243 
3244 /* try to acquire the lock */
3245 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3246 #if KMP_USE_DYNAMIC_LOCK
3247  int rc;
3248 #if USE_ITT_BUILD
3249  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3250 #endif
3251 #if OMPT_SUPPORT && OMPT_OPTIONAL
3252  // This is the case, if called from omp_init_lock_with_hint:
3253  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3254  if (!codeptr)
3255  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3256  if (ompt_enabled.ompt_callback_mutex_acquire) {
3257  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3258  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3259  __ompt_get_mutex_impl_type(user_lock),
3260  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3261  }
3262 #endif
3263  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3264 #if USE_ITT_BUILD
3265  if (rc) {
3266  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267  } else {
3268  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3269  }
3270 #endif
3271 #if OMPT_SUPPORT && OMPT_OPTIONAL
3272  if (ompt_enabled.enabled && rc) {
3273  if (rc == 1) {
3274  if (ompt_enabled.ompt_callback_mutex_acquired) {
3275  // lock_first
3276  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3277  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3278  codeptr);
3279  }
3280  } else {
3281  if (ompt_enabled.ompt_callback_nest_lock) {
3282  // lock_next
3283  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3284  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3285  }
3286  }
3287  }
3288 #endif
3289  return rc;
3290 
3291 #else // KMP_USE_DYNAMIC_LOCK
3292 
3293  kmp_user_lock_p lck;
3294  int rc;
3295 
3296  if ((__kmp_user_lock_kind == lk_tas) &&
3297  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3298  OMP_NEST_LOCK_T_SIZE)) {
3299  lck = (kmp_user_lock_p)user_lock;
3300  }
3301 #if KMP_USE_FUTEX
3302  else if ((__kmp_user_lock_kind == lk_futex) &&
3303  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3304  OMP_NEST_LOCK_T_SIZE)) {
3305  lck = (kmp_user_lock_p)user_lock;
3306  }
3307 #endif
3308  else {
3309  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3310  }
3311 
3312 #if USE_ITT_BUILD
3313  __kmp_itt_lock_acquiring(lck);
3314 #endif /* USE_ITT_BUILD */
3315 
3316 #if OMPT_SUPPORT && OMPT_OPTIONAL
3317  // This is the case, if called from omp_init_lock_with_hint:
3318  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3319  if (!codeptr)
3320  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3321  if (ompt_enabled.enabled) &&
3322  ompt_enabled.ompt_callback_mutex_acquire) {
3323  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3324  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3325  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3326  codeptr);
3327  }
3328 #endif
3329 
3330  rc = TEST_NESTED_LOCK(lck, gtid);
3331 #if USE_ITT_BUILD
3332  if (rc) {
3333  __kmp_itt_lock_acquired(lck);
3334  } else {
3335  __kmp_itt_lock_cancelled(lck);
3336  }
3337 #endif /* USE_ITT_BUILD */
3338 #if OMPT_SUPPORT && OMPT_OPTIONAL
3339  if (ompt_enabled.enabled && rc) {
3340  if (rc == 1) {
3341  if (ompt_enabled.ompt_callback_mutex_acquired) {
3342  // lock_first
3343  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3344  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3345  }
3346  } else {
3347  if (ompt_enabled.ompt_callback_nest_lock) {
3348  // lock_next
3349  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3350  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3351  }
3352  }
3353  }
3354 #endif
3355  return rc;
3356 
3357  /* Can't use serial interval since not block structured */
3358 
3359 #endif // KMP_USE_DYNAMIC_LOCK
3360 }
3361 
3362 // Interface to fast scalable reduce methods routines
3363 
3364 // keep the selected method in a thread local structure for cross-function
3365 // usage: will be used in __kmpc_end_reduce* functions;
3366 // another solution: to re-determine the method one more time in
3367 // __kmpc_end_reduce* functions (new prototype required then)
3368 // AT: which solution is better?
3369 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3370  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3371 
3372 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3373  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3374 
3375 // description of the packed_reduction_method variable: look at the macros in
3376 // kmp.h
3377 
3378 // used in a critical section reduce block
3379 static __forceinline void
3380 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3381  kmp_critical_name *crit) {
3382 
3383  // this lock was visible to a customer and to the threading profile tool as a
3384  // serial overhead span (although it's used for an internal purpose only)
3385  // why was it visible in previous implementation?
3386  // should we keep it visible in new reduce block?
3387  kmp_user_lock_p lck;
3388 
3389 #if KMP_USE_DYNAMIC_LOCK
3390 
3391  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3392  // Check if it is initialized.
3393  if (*lk == 0) {
3394  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3395  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3396  KMP_GET_D_TAG(__kmp_user_lock_seq));
3397  } else {
3398  __kmp_init_indirect_csptr(crit, loc, global_tid,
3399  KMP_GET_I_TAG(__kmp_user_lock_seq));
3400  }
3401  }
3402  // Branch for accessing the actual lock object and set operation. This
3403  // branching is inevitable since this lock initialization does not follow the
3404  // normal dispatch path (lock table is not used).
3405  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3406  lck = (kmp_user_lock_p)lk;
3407  KMP_DEBUG_ASSERT(lck != NULL);
3408  if (__kmp_env_consistency_check) {
3409  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3410  }
3411  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3412  } else {
3413  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3414  lck = ilk->lock;
3415  KMP_DEBUG_ASSERT(lck != NULL);
3416  if (__kmp_env_consistency_check) {
3417  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3418  }
3419  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3420  }
3421 
3422 #else // KMP_USE_DYNAMIC_LOCK
3423 
3424  // We know that the fast reduction code is only emitted by Intel compilers
3425  // with 32 byte critical sections. If there isn't enough space, then we
3426  // have to use a pointer.
3427  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3428  lck = (kmp_user_lock_p)crit;
3429  } else {
3430  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3431  }
3432  KMP_DEBUG_ASSERT(lck != NULL);
3433 
3434  if (__kmp_env_consistency_check)
3435  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3436 
3437  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3438 
3439 #endif // KMP_USE_DYNAMIC_LOCK
3440 }
3441 
3442 // used in a critical section reduce block
3443 static __forceinline void
3444 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3445  kmp_critical_name *crit) {
3446 
3447  kmp_user_lock_p lck;
3448 
3449 #if KMP_USE_DYNAMIC_LOCK
3450 
3451  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3452  lck = (kmp_user_lock_p)crit;
3453  if (__kmp_env_consistency_check)
3454  __kmp_pop_sync(global_tid, ct_critical, loc);
3455  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3456  } else {
3457  kmp_indirect_lock_t *ilk =
3458  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3459  if (__kmp_env_consistency_check)
3460  __kmp_pop_sync(global_tid, ct_critical, loc);
3461  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3462  }
3463 
3464 #else // KMP_USE_DYNAMIC_LOCK
3465 
3466  // We know that the fast reduction code is only emitted by Intel compilers
3467  // with 32 byte critical sections. If there isn't enough space, then we have
3468  // to use a pointer.
3469  if (__kmp_base_user_lock_size > 32) {
3470  lck = *((kmp_user_lock_p *)crit);
3471  KMP_ASSERT(lck != NULL);
3472  } else {
3473  lck = (kmp_user_lock_p)crit;
3474  }
3475 
3476  if (__kmp_env_consistency_check)
3477  __kmp_pop_sync(global_tid, ct_critical, loc);
3478 
3479  __kmp_release_user_lock_with_checks(lck, global_tid);
3480 
3481 #endif // KMP_USE_DYNAMIC_LOCK
3482 } // __kmp_end_critical_section_reduce_block
3483 
3484 static __forceinline int
3485 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3486  int *task_state) {
3487  kmp_team_t *team;
3488 
3489  // Check if we are inside the teams construct?
3490  if (th->th.th_teams_microtask) {
3491  *team_p = team = th->th.th_team;
3492  if (team->t.t_level == th->th.th_teams_level) {
3493  // This is reduction at teams construct.
3494  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3495  // Let's swap teams temporarily for the reduction.
3496  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3497  th->th.th_team = team->t.t_parent;
3498  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3499  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3500  *task_state = th->th.th_task_state;
3501  th->th.th_task_state = 0;
3502 
3503  return 1;
3504  }
3505  }
3506  return 0;
3507 }
3508 
3509 static __forceinline void
3510 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3511  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3512  th->th.th_info.ds.ds_tid = 0;
3513  th->th.th_team = team;
3514  th->th.th_team_nproc = team->t.t_nproc;
3515  th->th.th_task_team = team->t.t_task_team[task_state];
3516  __kmp_type_convert(task_state, &(th->th.th_task_state));
3517 }
3518 
3519 /* 2.a.i. Reduce Block without a terminating barrier */
3535 kmp_int32
3536 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3537  size_t reduce_size, void *reduce_data,
3538  void (*reduce_func)(void *lhs_data, void *rhs_data),
3539  kmp_critical_name *lck) {
3540 
3541  KMP_COUNT_BLOCK(REDUCE_nowait);
3542  int retval = 0;
3543  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3544  kmp_info_t *th;
3545  kmp_team_t *team;
3546  int teams_swapped = 0, task_state;
3547  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3548  __kmp_assert_valid_gtid(global_tid);
3549 
3550  // why do we need this initialization here at all?
3551  // Reduction clause can not be used as a stand-alone directive.
3552 
3553  // do not call __kmp_serial_initialize(), it will be called by
3554  // __kmp_parallel_initialize() if needed
3555  // possible detection of false-positive race by the threadchecker ???
3556  if (!TCR_4(__kmp_init_parallel))
3557  __kmp_parallel_initialize();
3558 
3559  __kmp_resume_if_soft_paused();
3560 
3561 // check correctness of reduce block nesting
3562 #if KMP_USE_DYNAMIC_LOCK
3563  if (__kmp_env_consistency_check)
3564  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3565 #else
3566  if (__kmp_env_consistency_check)
3567  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3568 #endif
3569 
3570  th = __kmp_thread_from_gtid(global_tid);
3571  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3572 
3573  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3574  // the value should be kept in a variable
3575  // the variable should be either a construct-specific or thread-specific
3576  // property, not a team specific property
3577  // (a thread can reach the next reduce block on the next construct, reduce
3578  // method may differ on the next construct)
3579  // an ident_t "loc" parameter could be used as a construct-specific property
3580  // (what if loc == 0?)
3581  // (if both construct-specific and team-specific variables were shared,
3582  // then unness extra syncs should be needed)
3583  // a thread-specific variable is better regarding two issues above (next
3584  // construct and extra syncs)
3585  // a thread-specific "th_local.reduction_method" variable is used currently
3586  // each thread executes 'determine' and 'set' lines (no need to execute by one
3587  // thread, to avoid unness extra syncs)
3588 
3589  packed_reduction_method = __kmp_determine_reduction_method(
3590  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3591  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3592 
3593  OMPT_REDUCTION_DECL(th, global_tid);
3594  if (packed_reduction_method == critical_reduce_block) {
3595 
3596  OMPT_REDUCTION_BEGIN;
3597 
3598  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3599  retval = 1;
3600 
3601  } else if (packed_reduction_method == empty_reduce_block) {
3602 
3603  OMPT_REDUCTION_BEGIN;
3604 
3605  // usage: if team size == 1, no synchronization is required ( Intel
3606  // platforms only )
3607  retval = 1;
3608 
3609  } else if (packed_reduction_method == atomic_reduce_block) {
3610 
3611  retval = 2;
3612 
3613  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3614  // won't be called by the code gen)
3615  // (it's not quite good, because the checking block has been closed by
3616  // this 'pop',
3617  // but atomic operation has not been executed yet, will be executed
3618  // slightly later, literally on next instruction)
3619  if (__kmp_env_consistency_check)
3620  __kmp_pop_sync(global_tid, ct_reduce, loc);
3621 
3622  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3623  tree_reduce_block)) {
3624 
3625 // AT: performance issue: a real barrier here
3626 // AT: (if primary thread is slow, other threads are blocked here waiting for
3627 // the primary thread to come and release them)
3628 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3629 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3630 // be confusing to a customer)
3631 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3632 // might go faster and be more in line with sense of NOWAIT
3633 // AT: TO DO: do epcc test and compare times
3634 
3635 // this barrier should be invisible to a customer and to the threading profile
3636 // tool (it's neither a terminating barrier nor customer's code, it's
3637 // used for an internal purpose)
3638 #if OMPT_SUPPORT
3639  // JP: can this barrier potentially leed to task scheduling?
3640  // JP: as long as there is a barrier in the implementation, OMPT should and
3641  // will provide the barrier events
3642  // so we set-up the necessary frame/return addresses.
3643  ompt_frame_t *ompt_frame;
3644  if (ompt_enabled.enabled) {
3645  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3646  if (ompt_frame->enter_frame.ptr == NULL)
3647  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3648  }
3649  OMPT_STORE_RETURN_ADDRESS(global_tid);
3650 #endif
3651 #if USE_ITT_NOTIFY
3652  __kmp_threads[global_tid]->th.th_ident = loc;
3653 #endif
3654  retval =
3655  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3656  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3657  retval = (retval != 0) ? (0) : (1);
3658 #if OMPT_SUPPORT && OMPT_OPTIONAL
3659  if (ompt_enabled.enabled) {
3660  ompt_frame->enter_frame = ompt_data_none;
3661  }
3662 #endif
3663 
3664  // all other workers except primary thread should do this pop here
3665  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3666  if (__kmp_env_consistency_check) {
3667  if (retval == 0) {
3668  __kmp_pop_sync(global_tid, ct_reduce, loc);
3669  }
3670  }
3671 
3672  } else {
3673 
3674  // should never reach this block
3675  KMP_ASSERT(0); // "unexpected method"
3676  }
3677  if (teams_swapped) {
3678  __kmp_restore_swapped_teams(th, team, task_state);
3679  }
3680  KA_TRACE(
3681  10,
3682  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3683  global_tid, packed_reduction_method, retval));
3684 
3685  return retval;
3686 }
3687 
3696 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3697  kmp_critical_name *lck) {
3698 
3699  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3700 
3701  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3702  __kmp_assert_valid_gtid(global_tid);
3703 
3704  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3705 
3706  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3707 
3708  if (packed_reduction_method == critical_reduce_block) {
3709 
3710  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3711  OMPT_REDUCTION_END;
3712 
3713  } else if (packed_reduction_method == empty_reduce_block) {
3714 
3715  // usage: if team size == 1, no synchronization is required ( on Intel
3716  // platforms only )
3717 
3718  OMPT_REDUCTION_END;
3719 
3720  } else if (packed_reduction_method == atomic_reduce_block) {
3721 
3722  // neither primary thread nor other workers should get here
3723  // (code gen does not generate this call in case 2: atomic reduce block)
3724  // actually it's better to remove this elseif at all;
3725  // after removal this value will checked by the 'else' and will assert
3726 
3727  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3728  tree_reduce_block)) {
3729 
3730  // only primary thread gets here
3731  // OMPT: tree reduction is annotated in the barrier code
3732 
3733  } else {
3734 
3735  // should never reach this block
3736  KMP_ASSERT(0); // "unexpected method"
3737  }
3738 
3739  if (__kmp_env_consistency_check)
3740  __kmp_pop_sync(global_tid, ct_reduce, loc);
3741 
3742  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3743  global_tid, packed_reduction_method));
3744 
3745  return;
3746 }
3747 
3748 /* 2.a.ii. Reduce Block with a terminating barrier */
3749 
3765 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3766  size_t reduce_size, void *reduce_data,
3767  void (*reduce_func)(void *lhs_data, void *rhs_data),
3768  kmp_critical_name *lck) {
3769  KMP_COUNT_BLOCK(REDUCE_wait);
3770  int retval = 0;
3771  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3772  kmp_info_t *th;
3773  kmp_team_t *team;
3774  int teams_swapped = 0, task_state;
3775 
3776  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3777  __kmp_assert_valid_gtid(global_tid);
3778 
3779  // why do we need this initialization here at all?
3780  // Reduction clause can not be a stand-alone directive.
3781 
3782  // do not call __kmp_serial_initialize(), it will be called by
3783  // __kmp_parallel_initialize() if needed
3784  // possible detection of false-positive race by the threadchecker ???
3785  if (!TCR_4(__kmp_init_parallel))
3786  __kmp_parallel_initialize();
3787 
3788  __kmp_resume_if_soft_paused();
3789 
3790 // check correctness of reduce block nesting
3791 #if KMP_USE_DYNAMIC_LOCK
3792  if (__kmp_env_consistency_check)
3793  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3794 #else
3795  if (__kmp_env_consistency_check)
3796  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3797 #endif
3798 
3799  th = __kmp_thread_from_gtid(global_tid);
3800  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3801 
3802  packed_reduction_method = __kmp_determine_reduction_method(
3803  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3804  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3805 
3806  OMPT_REDUCTION_DECL(th, global_tid);
3807 
3808  if (packed_reduction_method == critical_reduce_block) {
3809 
3810  OMPT_REDUCTION_BEGIN;
3811  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3812  retval = 1;
3813 
3814  } else if (packed_reduction_method == empty_reduce_block) {
3815 
3816  OMPT_REDUCTION_BEGIN;
3817  // usage: if team size == 1, no synchronization is required ( Intel
3818  // platforms only )
3819  retval = 1;
3820 
3821  } else if (packed_reduction_method == atomic_reduce_block) {
3822 
3823  retval = 2;
3824 
3825  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3826  tree_reduce_block)) {
3827 
3828 // case tree_reduce_block:
3829 // this barrier should be visible to a customer and to the threading profile
3830 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3831 #if OMPT_SUPPORT
3832  ompt_frame_t *ompt_frame;
3833  if (ompt_enabled.enabled) {
3834  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3835  if (ompt_frame->enter_frame.ptr == NULL)
3836  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3837  }
3838  OMPT_STORE_RETURN_ADDRESS(global_tid);
3839 #endif
3840 #if USE_ITT_NOTIFY
3841  __kmp_threads[global_tid]->th.th_ident =
3842  loc; // needed for correct notification of frames
3843 #endif
3844  retval =
3845  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3846  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3847  retval = (retval != 0) ? (0) : (1);
3848 #if OMPT_SUPPORT && OMPT_OPTIONAL
3849  if (ompt_enabled.enabled) {
3850  ompt_frame->enter_frame = ompt_data_none;
3851  }
3852 #endif
3853 
3854  // all other workers except primary thread should do this pop here
3855  // (none of other workers except primary will enter __kmpc_end_reduce())
3856  if (__kmp_env_consistency_check) {
3857  if (retval == 0) { // 0: all other workers; 1: primary thread
3858  __kmp_pop_sync(global_tid, ct_reduce, loc);
3859  }
3860  }
3861 
3862  } else {
3863 
3864  // should never reach this block
3865  KMP_ASSERT(0); // "unexpected method"
3866  }
3867  if (teams_swapped) {
3868  __kmp_restore_swapped_teams(th, team, task_state);
3869  }
3870 
3871  KA_TRACE(10,
3872  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3873  global_tid, packed_reduction_method, retval));
3874  return retval;
3875 }
3876 
3887 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3888  kmp_critical_name *lck) {
3889 
3890  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3891  kmp_info_t *th;
3892  kmp_team_t *team;
3893  int teams_swapped = 0, task_state;
3894 
3895  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3896  __kmp_assert_valid_gtid(global_tid);
3897 
3898  th = __kmp_thread_from_gtid(global_tid);
3899  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3900 
3901  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3902 
3903  // this barrier should be visible to a customer and to the threading profile
3904  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3905  OMPT_REDUCTION_DECL(th, global_tid);
3906 
3907  if (packed_reduction_method == critical_reduce_block) {
3908  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3909 
3910  OMPT_REDUCTION_END;
3911 
3912 // TODO: implicit barrier: should be exposed
3913 #if OMPT_SUPPORT
3914  ompt_frame_t *ompt_frame;
3915  if (ompt_enabled.enabled) {
3916  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3917  if (ompt_frame->enter_frame.ptr == NULL)
3918  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3919  }
3920  OMPT_STORE_RETURN_ADDRESS(global_tid);
3921 #endif
3922 #if USE_ITT_NOTIFY
3923  __kmp_threads[global_tid]->th.th_ident = loc;
3924 #endif
3925  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3926 #if OMPT_SUPPORT && OMPT_OPTIONAL
3927  if (ompt_enabled.enabled) {
3928  ompt_frame->enter_frame = ompt_data_none;
3929  }
3930 #endif
3931 
3932  } else if (packed_reduction_method == empty_reduce_block) {
3933 
3934  OMPT_REDUCTION_END;
3935 
3936 // usage: if team size==1, no synchronization is required (Intel platforms only)
3937 
3938 // TODO: implicit barrier: should be exposed
3939 #if OMPT_SUPPORT
3940  ompt_frame_t *ompt_frame;
3941  if (ompt_enabled.enabled) {
3942  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3943  if (ompt_frame->enter_frame.ptr == NULL)
3944  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3945  }
3946  OMPT_STORE_RETURN_ADDRESS(global_tid);
3947 #endif
3948 #if USE_ITT_NOTIFY
3949  __kmp_threads[global_tid]->th.th_ident = loc;
3950 #endif
3951  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3952 #if OMPT_SUPPORT && OMPT_OPTIONAL
3953  if (ompt_enabled.enabled) {
3954  ompt_frame->enter_frame = ompt_data_none;
3955  }
3956 #endif
3957 
3958  } else if (packed_reduction_method == atomic_reduce_block) {
3959 
3960 #if OMPT_SUPPORT
3961  ompt_frame_t *ompt_frame;
3962  if (ompt_enabled.enabled) {
3963  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3964  if (ompt_frame->enter_frame.ptr == NULL)
3965  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3966  }
3967  OMPT_STORE_RETURN_ADDRESS(global_tid);
3968 #endif
3969 // TODO: implicit barrier: should be exposed
3970 #if USE_ITT_NOTIFY
3971  __kmp_threads[global_tid]->th.th_ident = loc;
3972 #endif
3973  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3974 #if OMPT_SUPPORT && OMPT_OPTIONAL
3975  if (ompt_enabled.enabled) {
3976  ompt_frame->enter_frame = ompt_data_none;
3977  }
3978 #endif
3979 
3980  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3981  tree_reduce_block)) {
3982 
3983  // only primary thread executes here (primary releases all other workers)
3984  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3985  global_tid);
3986 
3987  } else {
3988 
3989  // should never reach this block
3990  KMP_ASSERT(0); // "unexpected method"
3991  }
3992  if (teams_swapped) {
3993  __kmp_restore_swapped_teams(th, team, task_state);
3994  }
3995 
3996  if (__kmp_env_consistency_check)
3997  __kmp_pop_sync(global_tid, ct_reduce, loc);
3998 
3999  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4000  global_tid, packed_reduction_method));
4001 
4002  return;
4003 }
4004 
4005 #undef __KMP_GET_REDUCTION_METHOD
4006 #undef __KMP_SET_REDUCTION_METHOD
4007 
4008 /* end of interface to fast scalable reduce routines */
4009 
4010 kmp_uint64 __kmpc_get_taskid() {
4011 
4012  kmp_int32 gtid;
4013  kmp_info_t *thread;
4014 
4015  gtid = __kmp_get_gtid();
4016  if (gtid < 0) {
4017  return 0;
4018  }
4019  thread = __kmp_thread_from_gtid(gtid);
4020  return thread->th.th_current_task->td_task_id;
4021 
4022 } // __kmpc_get_taskid
4023 
4024 kmp_uint64 __kmpc_get_parent_taskid() {
4025 
4026  kmp_int32 gtid;
4027  kmp_info_t *thread;
4028  kmp_taskdata_t *parent_task;
4029 
4030  gtid = __kmp_get_gtid();
4031  if (gtid < 0) {
4032  return 0;
4033  }
4034  thread = __kmp_thread_from_gtid(gtid);
4035  parent_task = thread->th.th_current_task->td_parent;
4036  return (parent_task == NULL ? 0 : parent_task->td_task_id);
4037 
4038 } // __kmpc_get_parent_taskid
4039 
4051 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4052  const struct kmp_dim *dims) {
4053  __kmp_assert_valid_gtid(gtid);
4054  int j, idx;
4055  kmp_int64 last, trace_count;
4056  kmp_info_t *th = __kmp_threads[gtid];
4057  kmp_team_t *team = th->th.th_team;
4058  kmp_uint32 *flags;
4059  kmp_disp_t *pr_buf = th->th.th_dispatch;
4060  dispatch_shared_info_t *sh_buf;
4061 
4062  KA_TRACE(
4063  20,
4064  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4065  gtid, num_dims, !team->t.t_serialized));
4066  KMP_DEBUG_ASSERT(dims != NULL);
4067  KMP_DEBUG_ASSERT(num_dims > 0);
4068 
4069  if (team->t.t_serialized) {
4070  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4071  return; // no dependencies if team is serialized
4072  }
4073  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4074  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4075  // the next loop
4076  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4077 
4078  // Save bounds info into allocated private buffer
4079  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4080  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4081  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4082  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4083  pr_buf->th_doacross_info[0] =
4084  (kmp_int64)num_dims; // first element is number of dimensions
4085  // Save also address of num_done in order to access it later without knowing
4086  // the buffer index
4087  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4088  pr_buf->th_doacross_info[2] = dims[0].lo;
4089  pr_buf->th_doacross_info[3] = dims[0].up;
4090  pr_buf->th_doacross_info[4] = dims[0].st;
4091  last = 5;
4092  for (j = 1; j < num_dims; ++j) {
4093  kmp_int64
4094  range_length; // To keep ranges of all dimensions but the first dims[0]
4095  if (dims[j].st == 1) { // most common case
4096  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4097  range_length = dims[j].up - dims[j].lo + 1;
4098  } else {
4099  if (dims[j].st > 0) {
4100  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4101  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4102  } else { // negative increment
4103  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4104  range_length =
4105  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4106  }
4107  }
4108  pr_buf->th_doacross_info[last++] = range_length;
4109  pr_buf->th_doacross_info[last++] = dims[j].lo;
4110  pr_buf->th_doacross_info[last++] = dims[j].up;
4111  pr_buf->th_doacross_info[last++] = dims[j].st;
4112  }
4113 
4114  // Compute total trip count.
4115  // Start with range of dims[0] which we don't need to keep in the buffer.
4116  if (dims[0].st == 1) { // most common case
4117  trace_count = dims[0].up - dims[0].lo + 1;
4118  } else if (dims[0].st > 0) {
4119  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4120  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4121  } else { // negative increment
4122  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4123  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4124  }
4125  for (j = 1; j < num_dims; ++j) {
4126  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4127  }
4128  KMP_DEBUG_ASSERT(trace_count > 0);
4129 
4130  // Check if shared buffer is not occupied by other loop (idx -
4131  // __kmp_dispatch_num_buffers)
4132  if (idx != sh_buf->doacross_buf_idx) {
4133  // Shared buffer is occupied, wait for it to be free
4134  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4135  __kmp_eq_4, NULL);
4136  }
4137 #if KMP_32_BIT_ARCH
4138  // Check if we are the first thread. After the CAS the first thread gets 0,
4139  // others get 1 if initialization is in progress, allocated pointer otherwise.
4140  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4141  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4142  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4143 #else
4144  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4145  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4146 #endif
4147  if (flags == NULL) {
4148  // we are the first thread, allocate the array of flags
4149  size_t size =
4150  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4151  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4152  KMP_MB();
4153  sh_buf->doacross_flags = flags;
4154  } else if (flags == (kmp_uint32 *)1) {
4155 #if KMP_32_BIT_ARCH
4156  // initialization is still in progress, need to wait
4157  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4158 #else
4159  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4160 #endif
4161  KMP_YIELD(TRUE);
4162  KMP_MB();
4163  } else {
4164  KMP_MB();
4165  }
4166  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4167  pr_buf->th_doacross_flags =
4168  sh_buf->doacross_flags; // save private copy in order to not
4169  // touch shared buffer on each iteration
4170  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4171 }
4172 
4173 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4174  __kmp_assert_valid_gtid(gtid);
4175  kmp_int64 shft;
4176  size_t num_dims, i;
4177  kmp_uint32 flag;
4178  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4179  kmp_info_t *th = __kmp_threads[gtid];
4180  kmp_team_t *team = th->th.th_team;
4181  kmp_disp_t *pr_buf;
4182  kmp_int64 lo, up, st;
4183 
4184  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4185  if (team->t.t_serialized) {
4186  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4187  return; // no dependencies if team is serialized
4188  }
4189 
4190  // calculate sequential iteration number and check out-of-bounds condition
4191  pr_buf = th->th.th_dispatch;
4192  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4193  num_dims = (size_t)pr_buf->th_doacross_info[0];
4194  lo = pr_buf->th_doacross_info[2];
4195  up = pr_buf->th_doacross_info[3];
4196  st = pr_buf->th_doacross_info[4];
4197 #if OMPT_SUPPORT && OMPT_OPTIONAL
4198  ompt_dependence_t deps[num_dims];
4199 #endif
4200  if (st == 1) { // most common case
4201  if (vec[0] < lo || vec[0] > up) {
4202  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4203  "bounds [%lld,%lld]\n",
4204  gtid, vec[0], lo, up));
4205  return;
4206  }
4207  iter_number = vec[0] - lo;
4208  } else if (st > 0) {
4209  if (vec[0] < lo || vec[0] > up) {
4210  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4211  "bounds [%lld,%lld]\n",
4212  gtid, vec[0], lo, up));
4213  return;
4214  }
4215  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4216  } else { // negative increment
4217  if (vec[0] > lo || vec[0] < up) {
4218  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4219  "bounds [%lld,%lld]\n",
4220  gtid, vec[0], lo, up));
4221  return;
4222  }
4223  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4224  }
4225 #if OMPT_SUPPORT && OMPT_OPTIONAL
4226  deps[0].variable.value = iter_number;
4227  deps[0].dependence_type = ompt_dependence_type_sink;
4228 #endif
4229  for (i = 1; i < num_dims; ++i) {
4230  kmp_int64 iter, ln;
4231  size_t j = i * 4;
4232  ln = pr_buf->th_doacross_info[j + 1];
4233  lo = pr_buf->th_doacross_info[j + 2];
4234  up = pr_buf->th_doacross_info[j + 3];
4235  st = pr_buf->th_doacross_info[j + 4];
4236  if (st == 1) {
4237  if (vec[i] < lo || vec[i] > up) {
4238  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4239  "bounds [%lld,%lld]\n",
4240  gtid, vec[i], lo, up));
4241  return;
4242  }
4243  iter = vec[i] - lo;
4244  } else if (st > 0) {
4245  if (vec[i] < lo || vec[i] > up) {
4246  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4247  "bounds [%lld,%lld]\n",
4248  gtid, vec[i], lo, up));
4249  return;
4250  }
4251  iter = (kmp_uint64)(vec[i] - lo) / st;
4252  } else { // st < 0
4253  if (vec[i] > lo || vec[i] < up) {
4254  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4255  "bounds [%lld,%lld]\n",
4256  gtid, vec[i], lo, up));
4257  return;
4258  }
4259  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4260  }
4261  iter_number = iter + ln * iter_number;
4262 #if OMPT_SUPPORT && OMPT_OPTIONAL
4263  deps[i].variable.value = iter;
4264  deps[i].dependence_type = ompt_dependence_type_sink;
4265 #endif
4266  }
4267  shft = iter_number % 32; // use 32-bit granularity
4268  iter_number >>= 5; // divided by 32
4269  flag = 1 << shft;
4270  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4271  KMP_YIELD(TRUE);
4272  }
4273  KMP_MB();
4274 #if OMPT_SUPPORT && OMPT_OPTIONAL
4275  if (ompt_enabled.ompt_callback_dependences) {
4276  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4277  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4278  }
4279 #endif
4280  KA_TRACE(20,
4281  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4282  gtid, (iter_number << 5) + shft));
4283 }
4284 
4285 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4286  __kmp_assert_valid_gtid(gtid);
4287  kmp_int64 shft;
4288  size_t num_dims, i;
4289  kmp_uint32 flag;
4290  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4291  kmp_info_t *th = __kmp_threads[gtid];
4292  kmp_team_t *team = th->th.th_team;
4293  kmp_disp_t *pr_buf;
4294  kmp_int64 lo, st;
4295 
4296  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4297  if (team->t.t_serialized) {
4298  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4299  return; // no dependencies if team is serialized
4300  }
4301 
4302  // calculate sequential iteration number (same as in "wait" but no
4303  // out-of-bounds checks)
4304  pr_buf = th->th.th_dispatch;
4305  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4306  num_dims = (size_t)pr_buf->th_doacross_info[0];
4307  lo = pr_buf->th_doacross_info[2];
4308  st = pr_buf->th_doacross_info[4];
4309 #if OMPT_SUPPORT && OMPT_OPTIONAL
4310  ompt_dependence_t deps[num_dims];
4311 #endif
4312  if (st == 1) { // most common case
4313  iter_number = vec[0] - lo;
4314  } else if (st > 0) {
4315  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4316  } else { // negative increment
4317  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318  }
4319 #if OMPT_SUPPORT && OMPT_OPTIONAL
4320  deps[0].variable.value = iter_number;
4321  deps[0].dependence_type = ompt_dependence_type_source;
4322 #endif
4323  for (i = 1; i < num_dims; ++i) {
4324  kmp_int64 iter, ln;
4325  size_t j = i * 4;
4326  ln = pr_buf->th_doacross_info[j + 1];
4327  lo = pr_buf->th_doacross_info[j + 2];
4328  st = pr_buf->th_doacross_info[j + 4];
4329  if (st == 1) {
4330  iter = vec[i] - lo;
4331  } else if (st > 0) {
4332  iter = (kmp_uint64)(vec[i] - lo) / st;
4333  } else { // st < 0
4334  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4335  }
4336  iter_number = iter + ln * iter_number;
4337 #if OMPT_SUPPORT && OMPT_OPTIONAL
4338  deps[i].variable.value = iter;
4339  deps[i].dependence_type = ompt_dependence_type_source;
4340 #endif
4341  }
4342 #if OMPT_SUPPORT && OMPT_OPTIONAL
4343  if (ompt_enabled.ompt_callback_dependences) {
4344  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4345  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4346  }
4347 #endif
4348  shft = iter_number % 32; // use 32-bit granularity
4349  iter_number >>= 5; // divided by 32
4350  flag = 1 << shft;
4351  KMP_MB();
4352  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4353  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4354  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4355  (iter_number << 5) + shft));
4356 }
4357 
4358 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4359  __kmp_assert_valid_gtid(gtid);
4360  kmp_int32 num_done;
4361  kmp_info_t *th = __kmp_threads[gtid];
4362  kmp_team_t *team = th->th.th_team;
4363  kmp_disp_t *pr_buf = th->th.th_dispatch;
4364 
4365  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4366  if (team->t.t_serialized) {
4367  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4368  return; // nothing to do
4369  }
4370  num_done =
4371  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4372  if (num_done == th->th.th_team_nproc) {
4373  // we are the last thread, need to free shared resources
4374  int idx = pr_buf->th_doacross_buf_idx - 1;
4375  dispatch_shared_info_t *sh_buf =
4376  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4377  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4378  (kmp_int64)&sh_buf->doacross_num_done);
4379  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4380  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4381  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4382  sh_buf->doacross_flags = NULL;
4383  sh_buf->doacross_num_done = 0;
4384  sh_buf->doacross_buf_idx +=
4385  __kmp_dispatch_num_buffers; // free buffer for future re-use
4386  }
4387  // free private resources (need to keep buffer index forever)
4388  pr_buf->th_doacross_flags = NULL;
4389  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4390  pr_buf->th_doacross_info = NULL;
4391  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4392 }
4393 
4394 /* OpenMP 5.1 Memory Management routines */
4395 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4396  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4397 }
4398 
4399 void *omp_aligned_alloc(size_t align, size_t size,
4400  omp_allocator_handle_t allocator) {
4401  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4402 }
4403 
4404 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4405  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4406 }
4407 
4408 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4409  omp_allocator_handle_t allocator) {
4410  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4411 }
4412 
4413 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4414  omp_allocator_handle_t free_allocator) {
4415  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4416  free_allocator);
4417 }
4418 
4419 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4420  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4421 }
4422 /* end of OpenMP 5.1 Memory Management routines */
4423 
4424 int __kmpc_get_target_offload(void) {
4425  if (!__kmp_init_serial) {
4426  __kmp_serial_initialize();
4427  }
4428  return __kmp_target_offload;
4429 }
4430 
4431 int __kmpc_pause_resource(kmp_pause_status_t level) {
4432  if (!__kmp_init_serial) {
4433  return 1; // Can't pause if runtime is not initialized
4434  }
4435  return __kmp_pause_resource(level);
4436 }
4437 
4438 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4439  if (!__kmp_init_serial)
4440  __kmp_serial_initialize();
4441 
4442  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4443 
4444 #if OMPT_SUPPORT
4445  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4446  ompt_callbacks.ompt_callback(ompt_callback_error)(
4447  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4448  OMPT_GET_RETURN_ADDRESS(0));
4449  }
4450 #endif // OMPT_SUPPORT
4451 
4452  char *src_loc;
4453  if (loc && loc->psource) {
4454  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4455  src_loc =
4456  __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4457  __kmp_str_loc_free(&str_loc);
4458  } else {
4459  src_loc = __kmp_str_format("unknown");
4460  }
4461 
4462  if (severity == severity_warning)
4463  KMP_WARNING(UserDirectedWarning, src_loc, message);
4464  else
4465  KMP_FATAL(UserDirectedError, src_loc, message);
4466 
4467  __kmp_str_free(&src_loc);
4468 }
4469 
4470 // Mark begin of scope directive.
4471 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4472 // reserved is for extension of scope directive and not used.
4473 #if OMPT_SUPPORT && OMPT_OPTIONAL
4474  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4475  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4476  int tid = __kmp_tid_from_gtid(gtid);
4477  ompt_callbacks.ompt_callback(ompt_callback_work)(
4478  ompt_work_scope, ompt_scope_begin,
4479  &(team->t.ompt_team_info.parallel_data),
4480  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4481  OMPT_GET_RETURN_ADDRESS(0));
4482  }
4483 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4484 }
4485 
4486 // Mark end of scope directive
4487 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4488 // reserved is for extension of scope directive and not used.
4489 #if OMPT_SUPPORT && OMPT_OPTIONAL
4490  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4491  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4492  int tid = __kmp_tid_from_gtid(gtid);
4493  ompt_callbacks.ompt_callback(ompt_callback_work)(
4494  ompt_work_scope, ompt_scope_end,
4495  &(team->t.ompt_team_info.parallel_data),
4496  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4497  OMPT_GET_RETURN_ADDRESS(0));
4498  }
4499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4500 }
4501 
4502 #ifdef KMP_USE_VERSION_SYMBOLS
4503 // For GOMP compatibility there are two versions of each omp_* API.
4504 // One is the plain C symbol and one is the Fortran symbol with an appended
4505 // underscore. When we implement a specific ompc_* version of an omp_*
4506 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4507 // instead of the Fortran versions in kmp_ftn_entry.h
4508 extern "C" {
4509 // Have to undef these from omp.h so they aren't translated into
4510 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4511 #ifdef omp_set_affinity_format
4512 #undef omp_set_affinity_format
4513 #endif
4514 #ifdef omp_get_affinity_format
4515 #undef omp_get_affinity_format
4516 #endif
4517 #ifdef omp_display_affinity
4518 #undef omp_display_affinity
4519 #endif
4520 #ifdef omp_capture_affinity
4521 #undef omp_capture_affinity
4522 #endif
4523 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4524  "OMP_5.0");
4525 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4526  "OMP_5.0");
4527 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4528  "OMP_5.0");
4529 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4530  "OMP_5.0");
4531 } // extern "C"
4532 #endif
__kmpc_end_single
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1883
__kmpc_fork_call
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:262
KMP_IDENT_WORK_LOOP
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
__kmpc_reduce
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3765
__kmpc_push_num_teams_51
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:400
KMP_IDENT_WORK_DISTRIBUTE
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
__kmpc_masked
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
Definition: kmp_csupport.cpp:858
__kmpc_global_thread_num
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:99
__kmpc_end
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.cpp:59
__kmpc_master
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:769
__kmpc_single
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1830
stats_state_e
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
__kmpc_end_serialized_parallel
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:540
__kmpc_flush
void __kmpc_flush(ident_t *loc)
Definition: kmp_csupport.cpp:698
ident::psource
const char * psource
Definition: kmp.h:244
__kmpc_end_reduce_nowait
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3696
__kmpc_bound_thread_num
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:134
__kmpc_end_reduce
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3887
__kmpc_fork_call_if
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
Definition: kmp_csupport.cpp:343
__kmpc_ordered
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:943
__kmpc_end_ordered
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:1016
__kmpc_begin
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.cpp:36
__kmpc_doacross_init
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
Definition: kmp_csupport.cpp:4051
__kmpc_fork_teams
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:421
KMP_IDENT_AUTOPAR
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
__kmpc_end_barrier_master
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1751
ident
Definition: kmp.h:234
__kmpc_critical
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1251
__kmpc_global_num_threads
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:121
__kmpc_bound_num_threads
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:144
__kmpc_end_masked
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:914
__kmpc_ok_to_fork
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.cpp:156
__kmpc_copyprivate_light
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
Definition: kmp_csupport.cpp:2254
__kmpc_barrier
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:720
__kmpc_end_master
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:826
__kmpc_in_parallel
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.cpp:218
__kmpc_reduce_nowait
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3536
__kmpc_push_num_threads
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:231
__kmpc_critical_with_hint
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
Definition: kmp_csupport.cpp:1478
kmpc_micro
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1664
__kmpc_end_critical
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1610
__kmpc_barrier_master_nowait
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1767
__kmpc_copyprivate
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
Definition: kmp_csupport.cpp:2178
ident::flags
kmp_int32 flags
Definition: kmp.h:236
__kmpc_push_num_teams
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:375
__kmpc_serialized_parallel
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:522
__kmpc_for_static_fini
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1910
__kmpc_barrier_master
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1707
KMP_COUNT_BLOCK
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911