LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44  int i;
45  kmp_uint32 x, y;
46 
47  /* Check to make sure unsigned arithmetic does wraps properly */
48  x = ~((kmp_uint32)0) - 2;
49  y = x - 2;
50 
51  for (i = 0; i < 8; ++i, ++x, ++y) {
52  kmp_uint32 z = (x - y);
53  KMP_ASSERT(z == 2);
54  }
55 
56  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75  return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80  KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84  if ((curr != 0) && (curr != gtid + 1))
85  __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94  KMP_FSYNC_ACQUIRED(lck);
95  return KMP_LOCK_ACQUIRED_FIRST;
96  }
97 
98  kmp_uint32 spins;
99  kmp_uint64 time;
100  KMP_FSYNC_PREPARE(lck);
101  KMP_INIT_YIELD(spins);
102  KMP_INIT_BACKOFF(time);
103  kmp_backoff_t backoff = __kmp_spin_backoff_params;
104  do {
105 #if !KMP_HAVE_UMWAIT
106  __kmp_spin_backoff(&backoff);
107 #else
108  if (!__kmp_tpause_enabled)
109  __kmp_spin_backoff(&backoff);
110 #endif
111  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113  !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
114  KMP_FSYNC_ACQUIRED(lck);
115  return KMP_LOCK_ACQUIRED_FIRST;
116 }
117 
118 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
119  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120  return retval;
121 }
122 
123 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
124  kmp_int32 gtid) {
125  char const *const func = "omp_set_lock";
126  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
127  __kmp_is_tas_lock_nestable(lck)) {
128  KMP_FATAL(LockNestableUsedAsSimple, func);
129  }
130  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131  KMP_FATAL(LockIsAlreadyOwned, func);
132  }
133  return __kmp_acquire_tas_lock(lck, gtid);
134 }
135 
136 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
137  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
141  KMP_FSYNC_ACQUIRED(lck);
142  return TRUE;
143  }
144  return FALSE;
145 }
146 
147 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
148  kmp_int32 gtid) {
149  char const *const func = "omp_test_lock";
150  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
151  __kmp_is_tas_lock_nestable(lck)) {
152  KMP_FATAL(LockNestableUsedAsSimple, func);
153  }
154  return __kmp_test_tas_lock(lck, gtid);
155 }
156 
157 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
158  KMP_MB(); /* Flush all pending memory write invalidates. */
159 
160  KMP_FSYNC_RELEASING(lck);
161  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162  KMP_MB(); /* Flush all pending memory write invalidates. */
163 
164  KMP_YIELD_OVERSUB();
165  return KMP_LOCK_RELEASED;
166 }
167 
168 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
169  kmp_int32 gtid) {
170  char const *const func = "omp_unset_lock";
171  KMP_MB(); /* in case another processor initialized lock */
172  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
173  __kmp_is_tas_lock_nestable(lck)) {
174  KMP_FATAL(LockNestableUsedAsSimple, func);
175  }
176  if (__kmp_get_tas_lock_owner(lck) == -1) {
177  KMP_FATAL(LockUnsettingFree, func);
178  }
179  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180  (__kmp_get_tas_lock_owner(lck) != gtid)) {
181  KMP_FATAL(LockUnsettingSetByAnother, func);
182  }
183  return __kmp_release_tas_lock(lck, gtid);
184 }
185 
186 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
187  lck->lk.poll = KMP_LOCK_FREE(tas);
188 }
189 
190 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
191 
192 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
193  char const *const func = "omp_destroy_lock";
194  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
195  __kmp_is_tas_lock_nestable(lck)) {
196  KMP_FATAL(LockNestableUsedAsSimple, func);
197  }
198  if (__kmp_get_tas_lock_owner(lck) != -1) {
199  KMP_FATAL(LockStillOwned, func);
200  }
201  __kmp_destroy_tas_lock(lck);
202 }
203 
204 // nested test and set locks
205 
206 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
207  KMP_DEBUG_ASSERT(gtid >= 0);
208 
209  if (__kmp_get_tas_lock_owner(lck) == gtid) {
210  lck->lk.depth_locked += 1;
211  return KMP_LOCK_ACQUIRED_NEXT;
212  } else {
213  __kmp_acquire_tas_lock_timed_template(lck, gtid);
214  lck->lk.depth_locked = 1;
215  return KMP_LOCK_ACQUIRED_FIRST;
216  }
217 }
218 
219 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
220  kmp_int32 gtid) {
221  char const *const func = "omp_set_nest_lock";
222  if (!__kmp_is_tas_lock_nestable(lck)) {
223  KMP_FATAL(LockSimpleUsedAsNestable, func);
224  }
225  return __kmp_acquire_nested_tas_lock(lck, gtid);
226 }
227 
228 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
229  int retval;
230 
231  KMP_DEBUG_ASSERT(gtid >= 0);
232 
233  if (__kmp_get_tas_lock_owner(lck) == gtid) {
234  retval = ++lck->lk.depth_locked;
235  } else if (!__kmp_test_tas_lock(lck, gtid)) {
236  retval = 0;
237  } else {
238  KMP_MB();
239  retval = lck->lk.depth_locked = 1;
240  }
241  return retval;
242 }
243 
244 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
245  kmp_int32 gtid) {
246  char const *const func = "omp_test_nest_lock";
247  if (!__kmp_is_tas_lock_nestable(lck)) {
248  KMP_FATAL(LockSimpleUsedAsNestable, func);
249  }
250  return __kmp_test_nested_tas_lock(lck, gtid);
251 }
252 
253 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
254  KMP_DEBUG_ASSERT(gtid >= 0);
255 
256  KMP_MB();
257  if (--(lck->lk.depth_locked) == 0) {
258  __kmp_release_tas_lock(lck, gtid);
259  return KMP_LOCK_RELEASED;
260  }
261  return KMP_LOCK_STILL_HELD;
262 }
263 
264 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
265  kmp_int32 gtid) {
266  char const *const func = "omp_unset_nest_lock";
267  KMP_MB(); /* in case another processor initialized lock */
268  if (!__kmp_is_tas_lock_nestable(lck)) {
269  KMP_FATAL(LockSimpleUsedAsNestable, func);
270  }
271  if (__kmp_get_tas_lock_owner(lck) == -1) {
272  KMP_FATAL(LockUnsettingFree, func);
273  }
274  if (__kmp_get_tas_lock_owner(lck) != gtid) {
275  KMP_FATAL(LockUnsettingSetByAnother, func);
276  }
277  return __kmp_release_nested_tas_lock(lck, gtid);
278 }
279 
280 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
281  __kmp_init_tas_lock(lck);
282  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283 }
284 
285 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
286  __kmp_destroy_tas_lock(lck);
287  lck->lk.depth_locked = 0;
288 }
289 
290 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
291  char const *const func = "omp_destroy_nest_lock";
292  if (!__kmp_is_tas_lock_nestable(lck)) {
293  KMP_FATAL(LockSimpleUsedAsNestable, func);
294  }
295  if (__kmp_get_tas_lock_owner(lck) != -1) {
296  KMP_FATAL(LockStillOwned, func);
297  }
298  __kmp_destroy_nested_tas_lock(lck);
299 }
300 
301 #if KMP_USE_FUTEX
302 
303 /* ------------------------------------------------------------------------ */
304 /* futex locks */
305 
306 // futex locks are really just test and set locks, with a different method
307 // of handling contention. They take the same amount of space as test and
308 // set locks, and are allocated the same way (i.e. use the area allocated by
309 // the compiler for non-nested locks / allocate nested locks on the heap).
310 
311 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313 }
314 
315 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316  return lck->lk.depth_locked != -1;
317 }
318 
319 __forceinline static int
320 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321  kmp_int32 gtid_code = (gtid + 1) << 1;
322 
323  KMP_MB();
324 
325 #ifdef USE_LOCK_PROFILE
326  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327  if ((curr != 0) && (curr != gtid_code))
328  __kmp_printf("LOCK CONTENTION: %p\n", lck);
329 /* else __kmp_printf( "." );*/
330 #endif /* USE_LOCK_PROFILE */
331 
332  KMP_FSYNC_PREPARE(lck);
333  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334  lck, lck->lk.poll, gtid));
335 
336  kmp_int32 poll_val;
337 
338  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339  &(lck->lk.poll), KMP_LOCK_FREE(futex),
340  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341 
342  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343  KA_TRACE(
344  1000,
345  ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346  lck, gtid, poll_val, cond));
347 
348  // NOTE: if you try to use the following condition for this branch
349  //
350  // if ( poll_val & 1 == 0 )
351  //
352  // Then the 12.0 compiler has a bug where the following block will
353  // always be skipped, regardless of the value of the LSB of poll_val.
354  if (!cond) {
355  // Try to set the lsb in the poll to indicate to the owner
356  // thread that they need to wake this thread up.
357  if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358  poll_val | KMP_LOCK_BUSY(1, futex))) {
359  KA_TRACE(
360  1000,
361  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362  lck, lck->lk.poll, gtid));
363  continue;
364  }
365  poll_val |= KMP_LOCK_BUSY(1, futex);
366 
367  KA_TRACE(1000,
368  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369  lck->lk.poll, gtid));
370  }
371 
372  KA_TRACE(
373  1000,
374  ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375  lck, gtid, poll_val));
376 
377  long rc;
378  if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379  NULL, 0)) != 0) {
380  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381  "failed (rc=%ld errno=%d)\n",
382  lck, gtid, poll_val, rc, errno));
383  continue;
384  }
385 
386  KA_TRACE(1000,
387  ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388  lck, gtid, poll_val));
389  // This thread has now done a successful futex wait call and was entered on
390  // the OS futex queue. We must now perform a futex wake call when releasing
391  // the lock, as we have no idea how many other threads are in the queue.
392  gtid_code |= 1;
393  }
394 
395  KMP_FSYNC_ACQUIRED(lck);
396  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397  lck->lk.poll, gtid));
398  return KMP_LOCK_ACQUIRED_FIRST;
399 }
400 
401 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403  return retval;
404 }
405 
406 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407  kmp_int32 gtid) {
408  char const *const func = "omp_set_lock";
409  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410  __kmp_is_futex_lock_nestable(lck)) {
411  KMP_FATAL(LockNestableUsedAsSimple, func);
412  }
413  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414  KMP_FATAL(LockIsAlreadyOwned, func);
415  }
416  return __kmp_acquire_futex_lock(lck, gtid);
417 }
418 
419 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
422  KMP_FSYNC_ACQUIRED(lck);
423  return TRUE;
424  }
425  return FALSE;
426 }
427 
428 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429  kmp_int32 gtid) {
430  char const *const func = "omp_test_lock";
431  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432  __kmp_is_futex_lock_nestable(lck)) {
433  KMP_FATAL(LockNestableUsedAsSimple, func);
434  }
435  return __kmp_test_futex_lock(lck, gtid);
436 }
437 
438 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439  KMP_MB(); /* Flush all pending memory write invalidates. */
440 
441  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442  lck, lck->lk.poll, gtid));
443 
444  KMP_FSYNC_RELEASING(lck);
445 
446  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447 
448  KA_TRACE(1000,
449  ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450  lck, gtid, poll_val));
451 
452  if (KMP_LOCK_STRIP(poll_val) & 1) {
453  KA_TRACE(1000,
454  ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455  lck, gtid));
456  syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457  NULL, NULL, 0);
458  }
459 
460  KMP_MB(); /* Flush all pending memory write invalidates. */
461 
462  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463  lck->lk.poll, gtid));
464 
465  KMP_YIELD_OVERSUB();
466  return KMP_LOCK_RELEASED;
467 }
468 
469 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470  kmp_int32 gtid) {
471  char const *const func = "omp_unset_lock";
472  KMP_MB(); /* in case another processor initialized lock */
473  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474  __kmp_is_futex_lock_nestable(lck)) {
475  KMP_FATAL(LockNestableUsedAsSimple, func);
476  }
477  if (__kmp_get_futex_lock_owner(lck) == -1) {
478  KMP_FATAL(LockUnsettingFree, func);
479  }
480  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481  (__kmp_get_futex_lock_owner(lck) != gtid)) {
482  KMP_FATAL(LockUnsettingSetByAnother, func);
483  }
484  return __kmp_release_futex_lock(lck, gtid);
485 }
486 
487 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489 }
490 
491 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492 
493 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494  char const *const func = "omp_destroy_lock";
495  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496  __kmp_is_futex_lock_nestable(lck)) {
497  KMP_FATAL(LockNestableUsedAsSimple, func);
498  }
499  if (__kmp_get_futex_lock_owner(lck) != -1) {
500  KMP_FATAL(LockStillOwned, func);
501  }
502  __kmp_destroy_futex_lock(lck);
503 }
504 
505 // nested futex locks
506 
507 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508  KMP_DEBUG_ASSERT(gtid >= 0);
509 
510  if (__kmp_get_futex_lock_owner(lck) == gtid) {
511  lck->lk.depth_locked += 1;
512  return KMP_LOCK_ACQUIRED_NEXT;
513  } else {
514  __kmp_acquire_futex_lock_timed_template(lck, gtid);
515  lck->lk.depth_locked = 1;
516  return KMP_LOCK_ACQUIRED_FIRST;
517  }
518 }
519 
520 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521  kmp_int32 gtid) {
522  char const *const func = "omp_set_nest_lock";
523  if (!__kmp_is_futex_lock_nestable(lck)) {
524  KMP_FATAL(LockSimpleUsedAsNestable, func);
525  }
526  return __kmp_acquire_nested_futex_lock(lck, gtid);
527 }
528 
529 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530  int retval;
531 
532  KMP_DEBUG_ASSERT(gtid >= 0);
533 
534  if (__kmp_get_futex_lock_owner(lck) == gtid) {
535  retval = ++lck->lk.depth_locked;
536  } else if (!__kmp_test_futex_lock(lck, gtid)) {
537  retval = 0;
538  } else {
539  KMP_MB();
540  retval = lck->lk.depth_locked = 1;
541  }
542  return retval;
543 }
544 
545 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546  kmp_int32 gtid) {
547  char const *const func = "omp_test_nest_lock";
548  if (!__kmp_is_futex_lock_nestable(lck)) {
549  KMP_FATAL(LockSimpleUsedAsNestable, func);
550  }
551  return __kmp_test_nested_futex_lock(lck, gtid);
552 }
553 
554 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555  KMP_DEBUG_ASSERT(gtid >= 0);
556 
557  KMP_MB();
558  if (--(lck->lk.depth_locked) == 0) {
559  __kmp_release_futex_lock(lck, gtid);
560  return KMP_LOCK_RELEASED;
561  }
562  return KMP_LOCK_STILL_HELD;
563 }
564 
565 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566  kmp_int32 gtid) {
567  char const *const func = "omp_unset_nest_lock";
568  KMP_MB(); /* in case another processor initialized lock */
569  if (!__kmp_is_futex_lock_nestable(lck)) {
570  KMP_FATAL(LockSimpleUsedAsNestable, func);
571  }
572  if (__kmp_get_futex_lock_owner(lck) == -1) {
573  KMP_FATAL(LockUnsettingFree, func);
574  }
575  if (__kmp_get_futex_lock_owner(lck) != gtid) {
576  KMP_FATAL(LockUnsettingSetByAnother, func);
577  }
578  return __kmp_release_nested_futex_lock(lck, gtid);
579 }
580 
581 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582  __kmp_init_futex_lock(lck);
583  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584 }
585 
586 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587  __kmp_destroy_futex_lock(lck);
588  lck->lk.depth_locked = 0;
589 }
590 
591 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592  char const *const func = "omp_destroy_nest_lock";
593  if (!__kmp_is_futex_lock_nestable(lck)) {
594  KMP_FATAL(LockSimpleUsedAsNestable, func);
595  }
596  if (__kmp_get_futex_lock_owner(lck) != -1) {
597  KMP_FATAL(LockStillOwned, func);
598  }
599  __kmp_destroy_nested_futex_lock(lck);
600 }
601 
602 #endif // KMP_USE_FUTEX
603 
604 /* ------------------------------------------------------------------------ */
605 /* ticket (bakery) locks */
606 
607 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
608  return std::atomic_load_explicit(&lck->lk.owner_id,
609  std::memory_order_relaxed) -
610  1;
611 }
612 
613 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
614  return std::atomic_load_explicit(&lck->lk.depth_locked,
615  std::memory_order_relaxed) != -1;
616 }
617 
618 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620  std::memory_order_acquire) == my_ticket;
621 }
622 
623 __forceinline static int
624 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
625  kmp_int32 gtid) {
626  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627  &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628 
629 #ifdef USE_LOCK_PROFILE
630  if (std::atomic_load_explicit(&lck->lk.now_serving,
631  std::memory_order_relaxed) != my_ticket)
632  __kmp_printf("LOCK CONTENTION: %p\n", lck);
633 /* else __kmp_printf( "." );*/
634 #endif /* USE_LOCK_PROFILE */
635 
636  if (std::atomic_load_explicit(&lck->lk.now_serving,
637  std::memory_order_acquire) == my_ticket) {
638  return KMP_LOCK_ACQUIRED_FIRST;
639  }
640  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
641  return KMP_LOCK_ACQUIRED_FIRST;
642 }
643 
644 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
645  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
646  return retval;
647 }
648 
649 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
650  kmp_int32 gtid) {
651  char const *const func = "omp_set_lock";
652 
653  if (!std::atomic_load_explicit(&lck->lk.initialized,
654  std::memory_order_relaxed)) {
655  KMP_FATAL(LockIsUninitialized, func);
656  }
657  if (lck->lk.self != lck) {
658  KMP_FATAL(LockIsUninitialized, func);
659  }
660  if (__kmp_is_ticket_lock_nestable(lck)) {
661  KMP_FATAL(LockNestableUsedAsSimple, func);
662  }
663  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664  KMP_FATAL(LockIsAlreadyOwned, func);
665  }
666 
667  __kmp_acquire_ticket_lock(lck, gtid);
668 
669  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670  std::memory_order_relaxed);
671  return KMP_LOCK_ACQUIRED_FIRST;
672 }
673 
674 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
675  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676  std::memory_order_relaxed);
677 
678  if (std::atomic_load_explicit(&lck->lk.now_serving,
679  std::memory_order_relaxed) == my_ticket) {
680  kmp_uint32 next_ticket = my_ticket + 1;
681  if (std::atomic_compare_exchange_strong_explicit(
682  &lck->lk.next_ticket, &my_ticket, next_ticket,
683  std::memory_order_acquire, std::memory_order_acquire)) {
684  return TRUE;
685  }
686  }
687  return FALSE;
688 }
689 
690 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
691  kmp_int32 gtid) {
692  char const *const func = "omp_test_lock";
693 
694  if (!std::atomic_load_explicit(&lck->lk.initialized,
695  std::memory_order_relaxed)) {
696  KMP_FATAL(LockIsUninitialized, func);
697  }
698  if (lck->lk.self != lck) {
699  KMP_FATAL(LockIsUninitialized, func);
700  }
701  if (__kmp_is_ticket_lock_nestable(lck)) {
702  KMP_FATAL(LockNestableUsedAsSimple, func);
703  }
704 
705  int retval = __kmp_test_ticket_lock(lck, gtid);
706 
707  if (retval) {
708  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709  std::memory_order_relaxed);
710  }
711  return retval;
712 }
713 
714 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
715  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
716  std::memory_order_relaxed) -
717  std::atomic_load_explicit(&lck->lk.now_serving,
718  std::memory_order_relaxed);
719 
720  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
721  std::memory_order_release);
722 
723  KMP_YIELD(distance >
724  (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
725  return KMP_LOCK_RELEASED;
726 }
727 
728 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
729  kmp_int32 gtid) {
730  char const *const func = "omp_unset_lock";
731 
732  if (!std::atomic_load_explicit(&lck->lk.initialized,
733  std::memory_order_relaxed)) {
734  KMP_FATAL(LockIsUninitialized, func);
735  }
736  if (lck->lk.self != lck) {
737  KMP_FATAL(LockIsUninitialized, func);
738  }
739  if (__kmp_is_ticket_lock_nestable(lck)) {
740  KMP_FATAL(LockNestableUsedAsSimple, func);
741  }
742  if (__kmp_get_ticket_lock_owner(lck) == -1) {
743  KMP_FATAL(LockUnsettingFree, func);
744  }
745  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
746  (__kmp_get_ticket_lock_owner(lck) != gtid)) {
747  KMP_FATAL(LockUnsettingSetByAnother, func);
748  }
749  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
750  return __kmp_release_ticket_lock(lck, gtid);
751 }
752 
753 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
754  lck->lk.location = NULL;
755  lck->lk.self = lck;
756  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
757  std::memory_order_relaxed);
758  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
759  std::memory_order_relaxed);
760  std::atomic_store_explicit(
761  &lck->lk.owner_id, 0,
762  std::memory_order_relaxed); // no thread owns the lock.
763  std::atomic_store_explicit(
764  &lck->lk.depth_locked, -1,
765  std::memory_order_relaxed); // -1 => not a nested lock.
766  std::atomic_store_explicit(&lck->lk.initialized, true,
767  std::memory_order_release);
768 }
769 
770 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
771  std::atomic_store_explicit(&lck->lk.initialized, false,
772  std::memory_order_release);
773  lck->lk.self = NULL;
774  lck->lk.location = NULL;
775  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
776  std::memory_order_relaxed);
777  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
778  std::memory_order_relaxed);
779  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
780  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
781  std::memory_order_relaxed);
782 }
783 
784 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
785  char const *const func = "omp_destroy_lock";
786 
787  if (!std::atomic_load_explicit(&lck->lk.initialized,
788  std::memory_order_relaxed)) {
789  KMP_FATAL(LockIsUninitialized, func);
790  }
791  if (lck->lk.self != lck) {
792  KMP_FATAL(LockIsUninitialized, func);
793  }
794  if (__kmp_is_ticket_lock_nestable(lck)) {
795  KMP_FATAL(LockNestableUsedAsSimple, func);
796  }
797  if (__kmp_get_ticket_lock_owner(lck) != -1) {
798  KMP_FATAL(LockStillOwned, func);
799  }
800  __kmp_destroy_ticket_lock(lck);
801 }
802 
803 // nested ticket locks
804 
805 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
806  KMP_DEBUG_ASSERT(gtid >= 0);
807 
808  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
809  std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
810  std::memory_order_relaxed);
811  return KMP_LOCK_ACQUIRED_NEXT;
812  } else {
813  __kmp_acquire_ticket_lock_timed_template(lck, gtid);
814  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
815  std::memory_order_relaxed);
816  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
817  std::memory_order_relaxed);
818  return KMP_LOCK_ACQUIRED_FIRST;
819  }
820 }
821 
822 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
823  kmp_int32 gtid) {
824  char const *const func = "omp_set_nest_lock";
825 
826  if (!std::atomic_load_explicit(&lck->lk.initialized,
827  std::memory_order_relaxed)) {
828  KMP_FATAL(LockIsUninitialized, func);
829  }
830  if (lck->lk.self != lck) {
831  KMP_FATAL(LockIsUninitialized, func);
832  }
833  if (!__kmp_is_ticket_lock_nestable(lck)) {
834  KMP_FATAL(LockSimpleUsedAsNestable, func);
835  }
836  return __kmp_acquire_nested_ticket_lock(lck, gtid);
837 }
838 
839 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
840  int retval;
841 
842  KMP_DEBUG_ASSERT(gtid >= 0);
843 
844  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
845  retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
846  std::memory_order_relaxed) +
847  1;
848  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
849  retval = 0;
850  } else {
851  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
852  std::memory_order_relaxed);
853  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
854  std::memory_order_relaxed);
855  retval = 1;
856  }
857  return retval;
858 }
859 
860 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
861  kmp_int32 gtid) {
862  char const *const func = "omp_test_nest_lock";
863 
864  if (!std::atomic_load_explicit(&lck->lk.initialized,
865  std::memory_order_relaxed)) {
866  KMP_FATAL(LockIsUninitialized, func);
867  }
868  if (lck->lk.self != lck) {
869  KMP_FATAL(LockIsUninitialized, func);
870  }
871  if (!__kmp_is_ticket_lock_nestable(lck)) {
872  KMP_FATAL(LockSimpleUsedAsNestable, func);
873  }
874  return __kmp_test_nested_ticket_lock(lck, gtid);
875 }
876 
877 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
878  KMP_DEBUG_ASSERT(gtid >= 0);
879 
880  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
881  std::memory_order_relaxed) -
882  1) == 0) {
883  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
884  __kmp_release_ticket_lock(lck, gtid);
885  return KMP_LOCK_RELEASED;
886  }
887  return KMP_LOCK_STILL_HELD;
888 }
889 
890 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
891  kmp_int32 gtid) {
892  char const *const func = "omp_unset_nest_lock";
893 
894  if (!std::atomic_load_explicit(&lck->lk.initialized,
895  std::memory_order_relaxed)) {
896  KMP_FATAL(LockIsUninitialized, func);
897  }
898  if (lck->lk.self != lck) {
899  KMP_FATAL(LockIsUninitialized, func);
900  }
901  if (!__kmp_is_ticket_lock_nestable(lck)) {
902  KMP_FATAL(LockSimpleUsedAsNestable, func);
903  }
904  if (__kmp_get_ticket_lock_owner(lck) == -1) {
905  KMP_FATAL(LockUnsettingFree, func);
906  }
907  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
908  KMP_FATAL(LockUnsettingSetByAnother, func);
909  }
910  return __kmp_release_nested_ticket_lock(lck, gtid);
911 }
912 
913 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914  __kmp_init_ticket_lock(lck);
915  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916  std::memory_order_relaxed);
917  // >= 0 for nestable locks, -1 for simple locks
918 }
919 
920 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
921  __kmp_destroy_ticket_lock(lck);
922  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
923  std::memory_order_relaxed);
924 }
925 
926 static void
927 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
928  char const *const func = "omp_destroy_nest_lock";
929 
930  if (!std::atomic_load_explicit(&lck->lk.initialized,
931  std::memory_order_relaxed)) {
932  KMP_FATAL(LockIsUninitialized, func);
933  }
934  if (lck->lk.self != lck) {
935  KMP_FATAL(LockIsUninitialized, func);
936  }
937  if (!__kmp_is_ticket_lock_nestable(lck)) {
938  KMP_FATAL(LockSimpleUsedAsNestable, func);
939  }
940  if (__kmp_get_ticket_lock_owner(lck) != -1) {
941  KMP_FATAL(LockStillOwned, func);
942  }
943  __kmp_destroy_nested_ticket_lock(lck);
944 }
945 
946 // access functions to fields which don't exist for all lock kinds.
947 
948 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
949  return lck->lk.location;
950 }
951 
952 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
953  const ident_t *loc) {
954  lck->lk.location = loc;
955 }
956 
957 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
958  return lck->lk.flags;
959 }
960 
961 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
962  kmp_lock_flags_t flags) {
963  lck->lk.flags = flags;
964 }
965 
966 /* ------------------------------------------------------------------------ */
967 /* queuing locks */
968 
969 /* First the states
970  (head,tail) = 0, 0 means lock is unheld, nobody on queue
971  UINT_MAX or -1, 0 means lock is held, nobody on queue
972  h, h means lock held or about to transition,
973  1 element on queue
974  h, t h <> t, means lock is held or about to
975  transition, >1 elements on queue
976 
977  Now the transitions
978  Acquire(0,0) = -1 ,0
979  Release(0,0) = Error
980  Acquire(-1,0) = h ,h h > 0
981  Release(-1,0) = 0 ,0
982  Acquire(h,h) = h ,t h > 0, t > 0, h <> t
983  Release(h,h) = -1 ,0 h > 0
984  Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
985  Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
986 
987  And pictorially
988 
989  +-----+
990  | 0, 0|------- release -------> Error
991  +-----+
992  | ^
993  acquire| |release
994  | |
995  | |
996  v |
997  +-----+
998  |-1, 0|
999  +-----+
1000  | ^
1001  acquire| |release
1002  | |
1003  | |
1004  v |
1005  +-----+
1006  | h, h|
1007  +-----+
1008  | ^
1009  acquire| |release
1010  | |
1011  | |
1012  v |
1013  +-----+
1014  | h, t|----- acquire, release loopback ---+
1015  +-----+ |
1016  ^ |
1017  | |
1018  +------------------------------------+
1019  */
1020 
1021 #ifdef DEBUG_QUEUING_LOCKS
1022 
1023 /* Stuff for circular trace buffer */
1024 #define TRACE_BUF_ELE 1024
1025 static char traces[TRACE_BUF_ELE][128] = {0};
1026 static int tc = 0;
1027 #define TRACE_LOCK(X, Y) \
1028  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1029 #define TRACE_LOCK_T(X, Y, Z) \
1030  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1031 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1032  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1033  Z, Q);
1034 
1035 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1036  kmp_queuing_lock_t *lck, kmp_int32 head_id,
1037  kmp_int32 tail_id) {
1038  kmp_int32 t, i;
1039 
1040  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1041 
1042  i = tc % TRACE_BUF_ELE;
1043  __kmp_printf_no_lock("%s\n", traces[i]);
1044  i = (i + 1) % TRACE_BUF_ELE;
1045  while (i != (tc % TRACE_BUF_ELE)) {
1046  __kmp_printf_no_lock("%s", traces[i]);
1047  i = (i + 1) % TRACE_BUF_ELE;
1048  }
1049  __kmp_printf_no_lock("\n");
1050 
1051  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1052  "next_wait:%d, head_id:%d, tail_id:%d\n",
1053  gtid + 1, this_thr->th.th_spin_here,
1054  this_thr->th.th_next_waiting, head_id, tail_id);
1055 
1056  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1057 
1058  if (lck->lk.head_id >= 1) {
1059  t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1060  while (t > 0) {
1061  __kmp_printf_no_lock("-> %d ", t);
1062  t = __kmp_threads[t - 1]->th.th_next_waiting;
1063  }
1064  }
1065  __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1066  __kmp_printf_no_lock("\n\n");
1067 }
1068 
1069 #endif /* DEBUG_QUEUING_LOCKS */
1070 
1071 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1072  return TCR_4(lck->lk.owner_id) - 1;
1073 }
1074 
1075 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1076  return lck->lk.depth_locked != -1;
1077 }
1078 
1079 /* Acquire a lock using a the queuing lock implementation */
1080 template <bool takeTime>
1081 /* [TLW] The unused template above is left behind because of what BEB believes
1082  is a potential compiler problem with __forceinline. */
1083 __forceinline static int
1084 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1085  kmp_int32 gtid) {
1086  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1087  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1088  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1089  volatile kmp_uint32 *spin_here_p;
1090 
1091 #if OMPT_SUPPORT
1092  ompt_state_t prev_state = ompt_state_undefined;
1093 #endif
1094 
1095  KA_TRACE(1000,
1096  ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1097 
1098  KMP_FSYNC_PREPARE(lck);
1099  KMP_DEBUG_ASSERT(this_thr != NULL);
1100  spin_here_p = &this_thr->th.th_spin_here;
1101 
1102 #ifdef DEBUG_QUEUING_LOCKS
1103  TRACE_LOCK(gtid + 1, "acq ent");
1104  if (*spin_here_p)
1105  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1106  if (this_thr->th.th_next_waiting != 0)
1107  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1108 #endif
1109  KMP_DEBUG_ASSERT(!*spin_here_p);
1110  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1111 
1112  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1113  head_id_p that may follow, not just in execution order, but also in
1114  visibility order. This way, when a releasing thread observes the changes to
1115  the queue by this thread, it can rightly assume that spin_here_p has
1116  already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1117  not premature. If the releasing thread sets spin_here_p to FALSE before
1118  this thread sets it to TRUE, this thread will hang. */
1119  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1120 
1121  while (1) {
1122  kmp_int32 enqueued;
1123  kmp_int32 head;
1124  kmp_int32 tail;
1125 
1126  head = *head_id_p;
1127 
1128  switch (head) {
1129 
1130  case -1: {
1131 #ifdef DEBUG_QUEUING_LOCKS
1132  tail = *tail_id_p;
1133  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1134 #endif
1135  tail = 0; /* to make sure next link asynchronously read is not set
1136  accidentally; this assignment prevents us from entering the
1137  if ( t > 0 ) condition in the enqueued case below, which is not
1138  necessary for this state transition */
1139 
1140  /* try (-1,0)->(tid,tid) */
1141  enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1142  KMP_PACK_64(-1, 0),
1143  KMP_PACK_64(gtid + 1, gtid + 1));
1144 #ifdef DEBUG_QUEUING_LOCKS
1145  if (enqueued)
1146  TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1147 #endif
1148  } break;
1149 
1150  default: {
1151  tail = *tail_id_p;
1152  KMP_DEBUG_ASSERT(tail != gtid + 1);
1153 
1154 #ifdef DEBUG_QUEUING_LOCKS
1155  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1156 #endif
1157 
1158  if (tail == 0) {
1159  enqueued = FALSE;
1160  } else {
1161  /* try (h,t) or (h,h)->(h,tid) */
1162  enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1163 
1164 #ifdef DEBUG_QUEUING_LOCKS
1165  if (enqueued)
1166  TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1167 #endif
1168  }
1169  } break;
1170 
1171  case 0: /* empty queue */
1172  {
1173  kmp_int32 grabbed_lock;
1174 
1175 #ifdef DEBUG_QUEUING_LOCKS
1176  tail = *tail_id_p;
1177  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1178 #endif
1179  /* try (0,0)->(-1,0) */
1180 
1181  /* only legal transition out of head = 0 is head = -1 with no change to
1182  * tail */
1183  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1184 
1185  if (grabbed_lock) {
1186 
1187  *spin_here_p = FALSE;
1188 
1189  KA_TRACE(
1190  1000,
1191  ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1192  lck, gtid));
1193 #ifdef DEBUG_QUEUING_LOCKS
1194  TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1195 #endif
1196 
1197 #if OMPT_SUPPORT
1198  if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1199  /* change the state before clearing wait_id */
1200  this_thr->th.ompt_thread_info.state = prev_state;
1201  this_thr->th.ompt_thread_info.wait_id = 0;
1202  }
1203 #endif
1204 
1205  KMP_FSYNC_ACQUIRED(lck);
1206  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1207  }
1208  enqueued = FALSE;
1209  } break;
1210  }
1211 
1212 #if OMPT_SUPPORT
1213  if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1214  /* this thread will spin; set wait_id before entering wait state */
1215  prev_state = this_thr->th.ompt_thread_info.state;
1216  this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1217  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1218  }
1219 #endif
1220 
1221  if (enqueued) {
1222  if (tail > 0) {
1223  kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1224  KMP_ASSERT(tail_thr != NULL);
1225  tail_thr->th.th_next_waiting = gtid + 1;
1226  /* corresponding wait for this write in release code */
1227  }
1228  KA_TRACE(1000,
1229  ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1230  lck, gtid));
1231 
1232  KMP_MB();
1233  // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234  KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1235  // Synchronize writes to both runtime thread structures
1236  // and writes in user code.
1237  KMP_MB();
1238 
1239 #ifdef DEBUG_QUEUING_LOCKS
1240  TRACE_LOCK(gtid + 1, "acq spin");
1241 
1242  if (this_thr->th.th_next_waiting != 0)
1243  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1244 #endif
1245  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1246  KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1247  "waiting on queue\n",
1248  lck, gtid));
1249 
1250 #ifdef DEBUG_QUEUING_LOCKS
1251  TRACE_LOCK(gtid + 1, "acq exit 2");
1252 #endif
1253 
1254 #if OMPT_SUPPORT
1255  /* change the state before clearing wait_id */
1256  this_thr->th.ompt_thread_info.state = prev_state;
1257  this_thr->th.ompt_thread_info.wait_id = 0;
1258 #endif
1259 
1260  /* got lock, we were dequeued by the thread that released lock */
1261  return KMP_LOCK_ACQUIRED_FIRST;
1262  }
1263 
1264  /* Yield if number of threads > number of logical processors */
1265  /* ToDo: Not sure why this should only be in oversubscription case,
1266  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1267  KMP_YIELD_OVERSUB();
1268 
1269 #ifdef DEBUG_QUEUING_LOCKS
1270  TRACE_LOCK(gtid + 1, "acq retry");
1271 #endif
1272  }
1273  KMP_ASSERT2(0, "should not get here");
1274  return KMP_LOCK_ACQUIRED_FIRST;
1275 }
1276 
1277 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1278  KMP_DEBUG_ASSERT(gtid >= 0);
1279 
1280  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1281  return retval;
1282 }
1283 
1284 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1285  kmp_int32 gtid) {
1286  char const *const func = "omp_set_lock";
1287  if (lck->lk.initialized != lck) {
1288  KMP_FATAL(LockIsUninitialized, func);
1289  }
1290  if (__kmp_is_queuing_lock_nestable(lck)) {
1291  KMP_FATAL(LockNestableUsedAsSimple, func);
1292  }
1293  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1294  KMP_FATAL(LockIsAlreadyOwned, func);
1295  }
1296 
1297  __kmp_acquire_queuing_lock(lck, gtid);
1298 
1299  lck->lk.owner_id = gtid + 1;
1300  return KMP_LOCK_ACQUIRED_FIRST;
1301 }
1302 
1303 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1304  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1305  kmp_int32 head;
1306 #ifdef KMP_DEBUG
1307  kmp_info_t *this_thr;
1308 #endif
1309 
1310  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1311  KMP_DEBUG_ASSERT(gtid >= 0);
1312 #ifdef KMP_DEBUG
1313  this_thr = __kmp_thread_from_gtid(gtid);
1314  KMP_DEBUG_ASSERT(this_thr != NULL);
1315  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1316 #endif
1317 
1318  head = *head_id_p;
1319 
1320  if (head == 0) { /* nobody on queue, nobody holding */
1321  /* try (0,0)->(-1,0) */
1322  if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1323  KA_TRACE(1000,
1324  ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1325  KMP_FSYNC_ACQUIRED(lck);
1326  return TRUE;
1327  }
1328  }
1329 
1330  KA_TRACE(1000,
1331  ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1332  return FALSE;
1333 }
1334 
1335 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1336  kmp_int32 gtid) {
1337  char const *const func = "omp_test_lock";
1338  if (lck->lk.initialized != lck) {
1339  KMP_FATAL(LockIsUninitialized, func);
1340  }
1341  if (__kmp_is_queuing_lock_nestable(lck)) {
1342  KMP_FATAL(LockNestableUsedAsSimple, func);
1343  }
1344 
1345  int retval = __kmp_test_queuing_lock(lck, gtid);
1346 
1347  if (retval) {
1348  lck->lk.owner_id = gtid + 1;
1349  }
1350  return retval;
1351 }
1352 
1353 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1354  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1355  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1356 
1357  KA_TRACE(1000,
1358  ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1359  KMP_DEBUG_ASSERT(gtid >= 0);
1360 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1361  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1362 #endif
1363  KMP_DEBUG_ASSERT(this_thr != NULL);
1364 #ifdef DEBUG_QUEUING_LOCKS
1365  TRACE_LOCK(gtid + 1, "rel ent");
1366 
1367  if (this_thr->th.th_spin_here)
1368  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1369  if (this_thr->th.th_next_waiting != 0)
1370  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1371 #endif
1372  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1373  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1374 
1375  KMP_FSYNC_RELEASING(lck);
1376 
1377  while (1) {
1378  kmp_int32 dequeued;
1379  kmp_int32 head;
1380  kmp_int32 tail;
1381 
1382  head = *head_id_p;
1383 
1384 #ifdef DEBUG_QUEUING_LOCKS
1385  tail = *tail_id_p;
1386  TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1387  if (head == 0)
1388  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1389 #endif
1390  KMP_DEBUG_ASSERT(head !=
1391  0); /* holding the lock, head must be -1 or queue head */
1392 
1393  if (head == -1) { /* nobody on queue */
1394  /* try (-1,0)->(0,0) */
1395  if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1396  KA_TRACE(
1397  1000,
1398  ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1399  lck, gtid));
1400 #ifdef DEBUG_QUEUING_LOCKS
1401  TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1402 #endif
1403 
1404 #if OMPT_SUPPORT
1405 /* nothing to do - no other thread is trying to shift blame */
1406 #endif
1407  return KMP_LOCK_RELEASED;
1408  }
1409  dequeued = FALSE;
1410  } else {
1411  KMP_MB();
1412  tail = *tail_id_p;
1413  if (head == tail) { /* only one thread on the queue */
1414 #ifdef DEBUG_QUEUING_LOCKS
1415  if (head <= 0)
1416  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1417 #endif
1418  KMP_DEBUG_ASSERT(head > 0);
1419 
1420  /* try (h,h)->(-1,0) */
1421  dequeued = KMP_COMPARE_AND_STORE_REL64(
1422  RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1423  KMP_PACK_64(-1, 0));
1424 #ifdef DEBUG_QUEUING_LOCKS
1425  TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1426 #endif
1427 
1428  } else {
1429  volatile kmp_int32 *waiting_id_p;
1430  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1431  KMP_DEBUG_ASSERT(head_thr != NULL);
1432  waiting_id_p = &head_thr->th.th_next_waiting;
1433 
1434 /* Does this require synchronous reads? */
1435 #ifdef DEBUG_QUEUING_LOCKS
1436  if (head <= 0 || tail <= 0)
1437  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1438 #endif
1439  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1440 
1441  /* try (h,t)->(h',t) or (t,t) */
1442  KMP_MB();
1443  /* make sure enqueuing thread has time to update next waiting thread
1444  * field */
1445  *head_id_p =
1446  KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1447 #ifdef DEBUG_QUEUING_LOCKS
1448  TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1449 #endif
1450  dequeued = TRUE;
1451  }
1452  }
1453 
1454  if (dequeued) {
1455  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1456  KMP_DEBUG_ASSERT(head_thr != NULL);
1457 
1458 /* Does this require synchronous reads? */
1459 #ifdef DEBUG_QUEUING_LOCKS
1460  if (head <= 0 || tail <= 0)
1461  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1462 #endif
1463  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1464 
1465  /* For clean code only. Thread not released until next statement prevents
1466  race with acquire code. */
1467  head_thr->th.th_next_waiting = 0;
1468 #ifdef DEBUG_QUEUING_LOCKS
1469  TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1470 #endif
1471 
1472  KMP_MB();
1473  /* reset spin value */
1474  head_thr->th.th_spin_here = FALSE;
1475 
1476  KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1477  "dequeuing\n",
1478  lck, gtid));
1479 #ifdef DEBUG_QUEUING_LOCKS
1480  TRACE_LOCK(gtid + 1, "rel exit 2");
1481 #endif
1482  return KMP_LOCK_RELEASED;
1483  }
1484  /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1485  threads */
1486 
1487 #ifdef DEBUG_QUEUING_LOCKS
1488  TRACE_LOCK(gtid + 1, "rel retry");
1489 #endif
1490 
1491  } /* while */
1492  KMP_ASSERT2(0, "should not get here");
1493  return KMP_LOCK_RELEASED;
1494 }
1495 
1496 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1497  kmp_int32 gtid) {
1498  char const *const func = "omp_unset_lock";
1499  KMP_MB(); /* in case another processor initialized lock */
1500  if (lck->lk.initialized != lck) {
1501  KMP_FATAL(LockIsUninitialized, func);
1502  }
1503  if (__kmp_is_queuing_lock_nestable(lck)) {
1504  KMP_FATAL(LockNestableUsedAsSimple, func);
1505  }
1506  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1507  KMP_FATAL(LockUnsettingFree, func);
1508  }
1509  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1510  KMP_FATAL(LockUnsettingSetByAnother, func);
1511  }
1512  lck->lk.owner_id = 0;
1513  return __kmp_release_queuing_lock(lck, gtid);
1514 }
1515 
1516 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1517  lck->lk.location = NULL;
1518  lck->lk.head_id = 0;
1519  lck->lk.tail_id = 0;
1520  lck->lk.next_ticket = 0;
1521  lck->lk.now_serving = 0;
1522  lck->lk.owner_id = 0; // no thread owns the lock.
1523  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1524  lck->lk.initialized = lck;
1525 
1526  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1527 }
1528 
1529 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1530  lck->lk.initialized = NULL;
1531  lck->lk.location = NULL;
1532  lck->lk.head_id = 0;
1533  lck->lk.tail_id = 0;
1534  lck->lk.next_ticket = 0;
1535  lck->lk.now_serving = 0;
1536  lck->lk.owner_id = 0;
1537  lck->lk.depth_locked = -1;
1538 }
1539 
1540 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1541  char const *const func = "omp_destroy_lock";
1542  if (lck->lk.initialized != lck) {
1543  KMP_FATAL(LockIsUninitialized, func);
1544  }
1545  if (__kmp_is_queuing_lock_nestable(lck)) {
1546  KMP_FATAL(LockNestableUsedAsSimple, func);
1547  }
1548  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1549  KMP_FATAL(LockStillOwned, func);
1550  }
1551  __kmp_destroy_queuing_lock(lck);
1552 }
1553 
1554 // nested queuing locks
1555 
1556 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1557  KMP_DEBUG_ASSERT(gtid >= 0);
1558 
1559  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1560  lck->lk.depth_locked += 1;
1561  return KMP_LOCK_ACQUIRED_NEXT;
1562  } else {
1563  __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1564  KMP_MB();
1565  lck->lk.depth_locked = 1;
1566  KMP_MB();
1567  lck->lk.owner_id = gtid + 1;
1568  return KMP_LOCK_ACQUIRED_FIRST;
1569  }
1570 }
1571 
1572 static int
1573 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1574  kmp_int32 gtid) {
1575  char const *const func = "omp_set_nest_lock";
1576  if (lck->lk.initialized != lck) {
1577  KMP_FATAL(LockIsUninitialized, func);
1578  }
1579  if (!__kmp_is_queuing_lock_nestable(lck)) {
1580  KMP_FATAL(LockSimpleUsedAsNestable, func);
1581  }
1582  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1583 }
1584 
1585 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1586  int retval;
1587 
1588  KMP_DEBUG_ASSERT(gtid >= 0);
1589 
1590  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1591  retval = ++lck->lk.depth_locked;
1592  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1593  retval = 0;
1594  } else {
1595  KMP_MB();
1596  retval = lck->lk.depth_locked = 1;
1597  KMP_MB();
1598  lck->lk.owner_id = gtid + 1;
1599  }
1600  return retval;
1601 }
1602 
1603 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1604  kmp_int32 gtid) {
1605  char const *const func = "omp_test_nest_lock";
1606  if (lck->lk.initialized != lck) {
1607  KMP_FATAL(LockIsUninitialized, func);
1608  }
1609  if (!__kmp_is_queuing_lock_nestable(lck)) {
1610  KMP_FATAL(LockSimpleUsedAsNestable, func);
1611  }
1612  return __kmp_test_nested_queuing_lock(lck, gtid);
1613 }
1614 
1615 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1616  KMP_DEBUG_ASSERT(gtid >= 0);
1617 
1618  KMP_MB();
1619  if (--(lck->lk.depth_locked) == 0) {
1620  KMP_MB();
1621  lck->lk.owner_id = 0;
1622  __kmp_release_queuing_lock(lck, gtid);
1623  return KMP_LOCK_RELEASED;
1624  }
1625  return KMP_LOCK_STILL_HELD;
1626 }
1627 
1628 static int
1629 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1630  kmp_int32 gtid) {
1631  char const *const func = "omp_unset_nest_lock";
1632  KMP_MB(); /* in case another processor initialized lock */
1633  if (lck->lk.initialized != lck) {
1634  KMP_FATAL(LockIsUninitialized, func);
1635  }
1636  if (!__kmp_is_queuing_lock_nestable(lck)) {
1637  KMP_FATAL(LockSimpleUsedAsNestable, func);
1638  }
1639  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1640  KMP_FATAL(LockUnsettingFree, func);
1641  }
1642  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1643  KMP_FATAL(LockUnsettingSetByAnother, func);
1644  }
1645  return __kmp_release_nested_queuing_lock(lck, gtid);
1646 }
1647 
1648 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1649  __kmp_init_queuing_lock(lck);
1650  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1651 }
1652 
1653 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1654  __kmp_destroy_queuing_lock(lck);
1655  lck->lk.depth_locked = 0;
1656 }
1657 
1658 static void
1659 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1660  char const *const func = "omp_destroy_nest_lock";
1661  if (lck->lk.initialized != lck) {
1662  KMP_FATAL(LockIsUninitialized, func);
1663  }
1664  if (!__kmp_is_queuing_lock_nestable(lck)) {
1665  KMP_FATAL(LockSimpleUsedAsNestable, func);
1666  }
1667  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1668  KMP_FATAL(LockStillOwned, func);
1669  }
1670  __kmp_destroy_nested_queuing_lock(lck);
1671 }
1672 
1673 // access functions to fields which don't exist for all lock kinds.
1674 
1675 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1676  return lck->lk.location;
1677 }
1678 
1679 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1680  const ident_t *loc) {
1681  lck->lk.location = loc;
1682 }
1683 
1684 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1685  return lck->lk.flags;
1686 }
1687 
1688 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1689  kmp_lock_flags_t flags) {
1690  lck->lk.flags = flags;
1691 }
1692 
1693 #if KMP_USE_ADAPTIVE_LOCKS
1694 
1695 /* RTM Adaptive locks */
1696 
1697 #if KMP_HAVE_RTM_INTRINSICS
1698 #include <immintrin.h>
1699 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1700 
1701 #else
1702 
1703 // Values from the status register after failed speculation.
1704 #define _XBEGIN_STARTED (~0u)
1705 #define _XABORT_EXPLICIT (1 << 0)
1706 #define _XABORT_RETRY (1 << 1)
1707 #define _XABORT_CONFLICT (1 << 2)
1708 #define _XABORT_CAPACITY (1 << 3)
1709 #define _XABORT_DEBUG (1 << 4)
1710 #define _XABORT_NESTED (1 << 5)
1711 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1712 
1713 // Aborts for which it's worth trying again immediately
1714 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1715 
1716 #define STRINGIZE_INTERNAL(arg) #arg
1717 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1718 
1719 // Access to RTM instructions
1720 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1721  an abort. This is the same definition as the compiler intrinsic that will be
1722  supported at some point. */
1723 static __inline int _xbegin() {
1724  int res = -1;
1725 
1726 #if KMP_OS_WINDOWS
1727 #if KMP_ARCH_X86_64
1728  _asm {
1729  _emit 0xC7
1730  _emit 0xF8
1731  _emit 2
1732  _emit 0
1733  _emit 0
1734  _emit 0
1735  jmp L2
1736  mov res, eax
1737  L2:
1738  }
1739 #else /* IA32 */
1740  _asm {
1741  _emit 0xC7
1742  _emit 0xF8
1743  _emit 2
1744  _emit 0
1745  _emit 0
1746  _emit 0
1747  jmp L2
1748  mov res, eax
1749  L2:
1750  }
1751 #endif // KMP_ARCH_X86_64
1752 #else
1753  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1754  returned in %eax(%rax) on abort. Other register values are restored, so
1755  don't need to be killed.
1756 
1757  We must also mark 'res' as an input and an output, since otherwise
1758  'res=-1' may be dropped as being dead, whereas we do need the assignment on
1759  the successful (i.e., non-abort) path. */
1760  __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1761  " .long 1f-1b-6\n"
1762  " jmp 2f\n"
1763  "1: movl %%eax,%0\n"
1764  "2:"
1765  : "+r"(res)::"memory", "%eax");
1766 #endif // KMP_OS_WINDOWS
1767  return res;
1768 }
1769 
1770 /* Transaction end */
1771 static __inline void _xend() {
1772 #if KMP_OS_WINDOWS
1773  __asm {
1774  _emit 0x0f
1775  _emit 0x01
1776  _emit 0xd5
1777  }
1778 #else
1779  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1780 #endif
1781 }
1782 
1783 /* This is a macro, the argument must be a single byte constant which can be
1784  evaluated by the inline assembler, since it is emitted as a byte into the
1785  assembly code. */
1786 // clang-format off
1787 #if KMP_OS_WINDOWS
1788 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1789 #else
1790 #define _xabort(ARG) \
1791  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1792 #endif
1793 // clang-format on
1794 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1795 
1796 // Statistics is collected for testing purpose
1797 #if KMP_DEBUG_ADAPTIVE_LOCKS
1798 
1799 // We accumulate speculative lock statistics when the lock is destroyed. We
1800 // keep locks that haven't been destroyed in the liveLocks list so that we can
1801 // grab their statistics too.
1802 static kmp_adaptive_lock_statistics_t destroyedStats;
1803 
1804 // To hold the list of live locks.
1805 static kmp_adaptive_lock_info_t liveLocks;
1806 
1807 // A lock so we can safely update the list of locks.
1808 static kmp_bootstrap_lock_t chain_lock =
1809  KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1810 
1811 // Initialize the list of stats.
1812 void __kmp_init_speculative_stats() {
1813  kmp_adaptive_lock_info_t *lck = &liveLocks;
1814 
1815  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1816  sizeof(lck->stats));
1817  lck->stats.next = lck;
1818  lck->stats.prev = lck;
1819 
1820  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1821  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1822 
1823  __kmp_init_bootstrap_lock(&chain_lock);
1824 }
1825 
1826 // Insert the lock into the circular list
1827 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1828  __kmp_acquire_bootstrap_lock(&chain_lock);
1829 
1830  lck->stats.next = liveLocks.stats.next;
1831  lck->stats.prev = &liveLocks;
1832 
1833  liveLocks.stats.next = lck;
1834  lck->stats.next->stats.prev = lck;
1835 
1836  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838 
1839  __kmp_release_bootstrap_lock(&chain_lock);
1840 }
1841 
1842 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1843  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1844  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1845 
1846  kmp_adaptive_lock_info_t *n = lck->stats.next;
1847  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1848 
1849  n->stats.prev = p;
1850  p->stats.next = n;
1851 }
1852 
1853 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1854  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1855  sizeof(lck->stats));
1856  __kmp_remember_lock(lck);
1857 }
1858 
1859 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1860  kmp_adaptive_lock_info_t *lck) {
1861  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1862 
1863  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1864  t->successfulSpeculations += s->successfulSpeculations;
1865  t->hardFailedSpeculations += s->hardFailedSpeculations;
1866  t->softFailedSpeculations += s->softFailedSpeculations;
1867  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1868  t->lemmingYields += s->lemmingYields;
1869 }
1870 
1871 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1872  __kmp_acquire_bootstrap_lock(&chain_lock);
1873 
1874  __kmp_add_stats(&destroyedStats, lck);
1875  __kmp_forget_lock(lck);
1876 
1877  __kmp_release_bootstrap_lock(&chain_lock);
1878 }
1879 
1880 static float percent(kmp_uint32 count, kmp_uint32 total) {
1881  return (total == 0) ? 0.0 : (100.0 * count) / total;
1882 }
1883 
1884 void __kmp_print_speculative_stats() {
1885  kmp_adaptive_lock_statistics_t total = destroyedStats;
1886  kmp_adaptive_lock_info_t *lck;
1887 
1888  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1889  __kmp_add_stats(&total, lck);
1890  }
1891  kmp_adaptive_lock_statistics_t *t = &total;
1892  kmp_uint32 totalSections =
1893  t->nonSpeculativeAcquires + t->successfulSpeculations;
1894  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1895  t->hardFailedSpeculations +
1896  t->softFailedSpeculations;
1897  if (totalSections <= 0)
1898  return;
1899 
1900  kmp_safe_raii_file_t statsFile;
1901  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1902  statsFile.set_stdout();
1903  } else {
1904  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1905  char buffer[buffLen];
1906  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1907  (kmp_int32)getpid());
1908  statsFile.open(buffer, "w");
1909  }
1910 
1911  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1912  fprintf(statsFile,
1913  " Lock parameters: \n"
1914  " max_soft_retries : %10d\n"
1915  " max_badness : %10d\n",
1916  __kmp_adaptive_backoff_params.max_soft_retries,
1917  __kmp_adaptive_backoff_params.max_badness);
1918  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1919  t->nonSpeculativeAcquireAttempts);
1920  fprintf(statsFile, " Total critical sections : %10d\n",
1921  totalSections);
1922  fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1923  t->successfulSpeculations,
1924  percent(t->successfulSpeculations, totalSections));
1925  fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1926  t->nonSpeculativeAcquires,
1927  percent(t->nonSpeculativeAcquires, totalSections));
1928  fprintf(statsFile, " Lemming yields : %10d\n\n",
1929  t->lemmingYields);
1930 
1931  fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1932  totalSpeculations);
1933  fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1934  t->successfulSpeculations,
1935  percent(t->successfulSpeculations, totalSpeculations));
1936  fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1937  t->softFailedSpeculations,
1938  percent(t->softFailedSpeculations, totalSpeculations));
1939  fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1940  t->hardFailedSpeculations,
1941  percent(t->hardFailedSpeculations, totalSpeculations));
1942 }
1943 
1944 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1945 #else
1946 #define KMP_INC_STAT(lck, stat)
1947 
1948 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1949 
1950 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1951  // It is enough to check that the head_id is zero.
1952  // We don't also need to check the tail.
1953  bool res = lck->lk.head_id == 0;
1954 
1955 // We need a fence here, since we must ensure that no memory operations
1956 // from later in this thread float above that read.
1957 #if KMP_COMPILER_ICC
1958  _mm_mfence();
1959 #else
1960  __sync_synchronize();
1961 #endif
1962 
1963  return res;
1964 }
1965 
1966 // Functions for manipulating the badness
1967 static __inline void
1968 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1969  // Reset the badness to zero so we eagerly try to speculate again
1970  lck->lk.adaptive.badness = 0;
1971  KMP_INC_STAT(lck, successfulSpeculations);
1972 }
1973 
1974 // Create a bit mask with one more set bit.
1975 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1976  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1977  if (newBadness > lck->lk.adaptive.max_badness) {
1978  return;
1979  } else {
1980  lck->lk.adaptive.badness = newBadness;
1981  }
1982 }
1983 
1984 // Check whether speculation should be attempted.
1985 KMP_ATTRIBUTE_TARGET_RTM
1986 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1987  kmp_int32 gtid) {
1988  kmp_uint32 badness = lck->lk.adaptive.badness;
1989  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1990  int res = (attempts & badness) == 0;
1991  return res;
1992 }
1993 
1994 // Attempt to acquire only the speculative lock.
1995 // Does not back off to the non-speculative lock.
1996 KMP_ATTRIBUTE_TARGET_RTM
1997 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1998  kmp_int32 gtid) {
1999  int retries = lck->lk.adaptive.max_soft_retries;
2000 
2001  // We don't explicitly count the start of speculation, rather we record the
2002  // results (success, hard fail, soft fail). The sum of all of those is the
2003  // total number of times we started speculation since all speculations must
2004  // end one of those ways.
2005  do {
2006  kmp_uint32 status = _xbegin();
2007  // Switch this in to disable actual speculation but exercise at least some
2008  // of the rest of the code. Useful for debugging...
2009  // kmp_uint32 status = _XABORT_NESTED;
2010 
2011  if (status == _XBEGIN_STARTED) {
2012  /* We have successfully started speculation. Check that no-one acquired
2013  the lock for real between when we last looked and now. This also gets
2014  the lock cache line into our read-set, which we need so that we'll
2015  abort if anyone later claims it for real. */
2016  if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2017  // Lock is now visibly acquired, so someone beat us to it. Abort the
2018  // transaction so we'll restart from _xbegin with the failure status.
2019  _xabort(0x01);
2020  KMP_ASSERT2(0, "should not get here");
2021  }
2022  return 1; // Lock has been acquired (speculatively)
2023  } else {
2024  // We have aborted, update the statistics
2025  if (status & SOFT_ABORT_MASK) {
2026  KMP_INC_STAT(lck, softFailedSpeculations);
2027  // and loop round to retry.
2028  } else {
2029  KMP_INC_STAT(lck, hardFailedSpeculations);
2030  // Give up if we had a hard failure.
2031  break;
2032  }
2033  }
2034  } while (retries--); // Loop while we have retries, and didn't fail hard.
2035 
2036  // Either we had a hard failure or we didn't succeed softly after
2037  // the full set of attempts, so back off the badness.
2038  __kmp_step_badness(lck);
2039  return 0;
2040 }
2041 
2042 // Attempt to acquire the speculative lock, or back off to the non-speculative
2043 // one if the speculative lock cannot be acquired.
2044 // We can succeed speculatively, non-speculatively, or fail.
2045 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2046  // First try to acquire the lock speculatively
2047  if (__kmp_should_speculate(lck, gtid) &&
2048  __kmp_test_adaptive_lock_only(lck, gtid))
2049  return 1;
2050 
2051  // Speculative acquisition failed, so try to acquire it non-speculatively.
2052  // Count the non-speculative acquire attempt
2053  lck->lk.adaptive.acquire_attempts++;
2054 
2055  // Use base, non-speculative lock.
2056  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2057  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2058  return 1; // Lock is acquired (non-speculatively)
2059  } else {
2060  return 0; // Failed to acquire the lock, it's already visibly locked.
2061  }
2062 }
2063 
2064 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2065  kmp_int32 gtid) {
2066  char const *const func = "omp_test_lock";
2067  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2068  KMP_FATAL(LockIsUninitialized, func);
2069  }
2070 
2071  int retval = __kmp_test_adaptive_lock(lck, gtid);
2072 
2073  if (retval) {
2074  lck->lk.qlk.owner_id = gtid + 1;
2075  }
2076  return retval;
2077 }
2078 
2079 // Block until we can acquire a speculative, adaptive lock. We check whether we
2080 // should be trying to speculate. If we should be, we check the real lock to see
2081 // if it is free, and, if not, pause without attempting to acquire it until it
2082 // is. Then we try the speculative acquire. This means that although we suffer
2083 // from lemmings a little (because all we can't acquire the lock speculatively
2084 // until the queue of threads waiting has cleared), we don't get into a state
2085 // where we can never acquire the lock speculatively (because we force the queue
2086 // to clear by preventing new arrivals from entering the queue). This does mean
2087 // that when we're trying to break lemmings, the lock is no longer fair. However
2088 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2089 // problem.
2090 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2091  kmp_int32 gtid) {
2092  if (__kmp_should_speculate(lck, gtid)) {
2093  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2094  if (__kmp_test_adaptive_lock_only(lck, gtid))
2095  return;
2096  // We tried speculation and failed, so give up.
2097  } else {
2098  // We can't try speculation until the lock is free, so we pause here
2099  // (without suspending on the queueing lock, to allow it to drain, then
2100  // try again. All other threads will also see the same result for
2101  // shouldSpeculate, so will be doing the same if they try to claim the
2102  // lock from now on.
2103  while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2104  KMP_INC_STAT(lck, lemmingYields);
2105  KMP_YIELD(TRUE);
2106  }
2107 
2108  if (__kmp_test_adaptive_lock_only(lck, gtid))
2109  return;
2110  }
2111  }
2112 
2113  // Speculative acquisition failed, so acquire it non-speculatively.
2114  // Count the non-speculative acquire attempt
2115  lck->lk.adaptive.acquire_attempts++;
2116 
2117  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2118  // We have acquired the base lock, so count that.
2119  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2120 }
2121 
2122 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2123  kmp_int32 gtid) {
2124  char const *const func = "omp_set_lock";
2125  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2126  KMP_FATAL(LockIsUninitialized, func);
2127  }
2128  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2129  KMP_FATAL(LockIsAlreadyOwned, func);
2130  }
2131 
2132  __kmp_acquire_adaptive_lock(lck, gtid);
2133 
2134  lck->lk.qlk.owner_id = gtid + 1;
2135 }
2136 
2137 KMP_ATTRIBUTE_TARGET_RTM
2138 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2139  kmp_int32 gtid) {
2140  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2141  lck))) { // If the lock doesn't look claimed we must be speculating.
2142  // (Or the user's code is buggy and they're releasing without locking;
2143  // if we had XTEST we'd be able to check that case...)
2144  _xend(); // Exit speculation
2145  __kmp_update_badness_after_success(lck);
2146  } else { // Since the lock *is* visibly locked we're not speculating,
2147  // so should use the underlying lock's release scheme.
2148  __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2149  }
2150  return KMP_LOCK_RELEASED;
2151 }
2152 
2153 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2154  kmp_int32 gtid) {
2155  char const *const func = "omp_unset_lock";
2156  KMP_MB(); /* in case another processor initialized lock */
2157  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2158  KMP_FATAL(LockIsUninitialized, func);
2159  }
2160  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2161  KMP_FATAL(LockUnsettingFree, func);
2162  }
2163  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2164  KMP_FATAL(LockUnsettingSetByAnother, func);
2165  }
2166  lck->lk.qlk.owner_id = 0;
2167  __kmp_release_adaptive_lock(lck, gtid);
2168  return KMP_LOCK_RELEASED;
2169 }
2170 
2171 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2172  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2173  lck->lk.adaptive.badness = 0;
2174  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2175  lck->lk.adaptive.max_soft_retries =
2176  __kmp_adaptive_backoff_params.max_soft_retries;
2177  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2178 #if KMP_DEBUG_ADAPTIVE_LOCKS
2179  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2180 #endif
2181  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2182 }
2183 
2184 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2185 #if KMP_DEBUG_ADAPTIVE_LOCKS
2186  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2187 #endif
2188  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2189  // Nothing needed for the speculative part.
2190 }
2191 
2192 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2193  char const *const func = "omp_destroy_lock";
2194  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2195  KMP_FATAL(LockIsUninitialized, func);
2196  }
2197  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2198  KMP_FATAL(LockStillOwned, func);
2199  }
2200  __kmp_destroy_adaptive_lock(lck);
2201 }
2202 
2203 #endif // KMP_USE_ADAPTIVE_LOCKS
2204 
2205 /* ------------------------------------------------------------------------ */
2206 /* DRDPA ticket locks */
2207 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2208 
2209 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2210  return lck->lk.owner_id - 1;
2211 }
2212 
2213 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2214  return lck->lk.depth_locked != -1;
2215 }
2216 
2217 __forceinline static int
2218 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2219  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2220  kmp_uint64 mask = lck->lk.mask; // atomic load
2221  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2222 
2223 #ifdef USE_LOCK_PROFILE
2224  if (polls[ticket & mask] != ticket)
2225  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2226 /* else __kmp_printf( "." );*/
2227 #endif /* USE_LOCK_PROFILE */
2228 
2229  // Now spin-wait, but reload the polls pointer and mask, in case the
2230  // polling area has been reconfigured. Unless it is reconfigured, the
2231  // reloads stay in L1 cache and are cheap.
2232  //
2233  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234  // The current implementation of KMP_WAIT doesn't allow for mask
2235  // and poll to be re-read every spin iteration.
2236  kmp_uint32 spins;
2237  kmp_uint64 time;
2238  KMP_FSYNC_PREPARE(lck);
2239  KMP_INIT_YIELD(spins);
2240  KMP_INIT_BACKOFF(time);
2241  while (polls[ticket & mask] < ticket) { // atomic load
2242  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2243  // Re-read the mask and the poll pointer from the lock structure.
2244  //
2245  // Make certain that "mask" is read before "polls" !!!
2246  //
2247  // If another thread picks reconfigures the polling area and updates their
2248  // values, and we get the new value of mask and the old polls pointer, we
2249  // could access memory beyond the end of the old polling area.
2250  mask = lck->lk.mask; // atomic load
2251  polls = lck->lk.polls; // atomic load
2252  }
2253 
2254  // Critical section starts here
2255  KMP_FSYNC_ACQUIRED(lck);
2256  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2257  ticket, lck));
2258  lck->lk.now_serving = ticket; // non-volatile store
2259 
2260  // Deallocate a garbage polling area if we know that we are the last
2261  // thread that could possibly access it.
2262  //
2263  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2264  // ticket.
2265  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2266  __kmp_free(lck->lk.old_polls);
2267  lck->lk.old_polls = NULL;
2268  lck->lk.cleanup_ticket = 0;
2269  }
2270 
2271  // Check to see if we should reconfigure the polling area.
2272  // If there is still a garbage polling area to be deallocated from a
2273  // previous reconfiguration, let a later thread reconfigure it.
2274  if (lck->lk.old_polls == NULL) {
2275  bool reconfigure = false;
2276  std::atomic<kmp_uint64> *old_polls = polls;
2277  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2278 
2279  if (TCR_4(__kmp_nth) >
2280  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2281  // We are in oversubscription mode. Contract the polling area
2282  // down to a single location, if that hasn't been done already.
2283  if (num_polls > 1) {
2284  reconfigure = true;
2285  num_polls = TCR_4(lck->lk.num_polls);
2286  mask = 0;
2287  num_polls = 1;
2288  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2289  sizeof(*polls));
2290  polls[0] = ticket;
2291  }
2292  } else {
2293  // We are in under/fully subscribed mode. Check the number of
2294  // threads waiting on the lock. The size of the polling area
2295  // should be at least the number of threads waiting.
2296  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2297  if (num_waiting > num_polls) {
2298  kmp_uint32 old_num_polls = num_polls;
2299  reconfigure = true;
2300  do {
2301  mask = (mask << 1) | 1;
2302  num_polls *= 2;
2303  } while (num_polls <= num_waiting);
2304 
2305  // Allocate the new polling area, and copy the relevant portion
2306  // of the old polling area to the new area. __kmp_allocate()
2307  // zeroes the memory it allocates, and most of the old area is
2308  // just zero padding, so we only copy the release counters.
2309  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2310  sizeof(*polls));
2311  kmp_uint32 i;
2312  for (i = 0; i < old_num_polls; i++) {
2313  polls[i].store(old_polls[i]);
2314  }
2315  }
2316  }
2317 
2318  if (reconfigure) {
2319  // Now write the updated fields back to the lock structure.
2320  //
2321  // Make certain that "polls" is written before "mask" !!!
2322  //
2323  // If another thread picks up the new value of mask and the old polls
2324  // pointer , it could access memory beyond the end of the old polling
2325  // area.
2326  //
2327  // On x86, we need memory fences.
2328  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2329  "lock %p to %d polls\n",
2330  ticket, lck, num_polls));
2331 
2332  lck->lk.old_polls = old_polls;
2333  lck->lk.polls = polls; // atomic store
2334 
2335  KMP_MB();
2336 
2337  lck->lk.num_polls = num_polls;
2338  lck->lk.mask = mask; // atomic store
2339 
2340  KMP_MB();
2341 
2342  // Only after the new polling area and mask have been flushed
2343  // to main memory can we update the cleanup ticket field.
2344  //
2345  // volatile load / non-volatile store
2346  lck->lk.cleanup_ticket = lck->lk.next_ticket;
2347  }
2348  }
2349  return KMP_LOCK_ACQUIRED_FIRST;
2350 }
2351 
2352 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2353  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2354  return retval;
2355 }
2356 
2357 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2358  kmp_int32 gtid) {
2359  char const *const func = "omp_set_lock";
2360  if (lck->lk.initialized != lck) {
2361  KMP_FATAL(LockIsUninitialized, func);
2362  }
2363  if (__kmp_is_drdpa_lock_nestable(lck)) {
2364  KMP_FATAL(LockNestableUsedAsSimple, func);
2365  }
2366  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2367  KMP_FATAL(LockIsAlreadyOwned, func);
2368  }
2369 
2370  __kmp_acquire_drdpa_lock(lck, gtid);
2371 
2372  lck->lk.owner_id = gtid + 1;
2373  return KMP_LOCK_ACQUIRED_FIRST;
2374 }
2375 
2376 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2377  // First get a ticket, then read the polls pointer and the mask.
2378  // The polls pointer must be read before the mask!!! (See above)
2379  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2380  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2381  kmp_uint64 mask = lck->lk.mask; // atomic load
2382  if (polls[ticket & mask] == ticket) {
2383  kmp_uint64 next_ticket = ticket + 1;
2384  if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2385  next_ticket)) {
2386  KMP_FSYNC_ACQUIRED(lck);
2387  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2388  ticket, lck));
2389  lck->lk.now_serving = ticket; // non-volatile store
2390 
2391  // Since no threads are waiting, there is no possibility that we would
2392  // want to reconfigure the polling area. We might have the cleanup ticket
2393  // value (which says that it is now safe to deallocate old_polls), but
2394  // we'll let a later thread which calls __kmp_acquire_lock do that - this
2395  // routine isn't supposed to block, and we would risk blocks if we called
2396  // __kmp_free() to do the deallocation.
2397  return TRUE;
2398  }
2399  }
2400  return FALSE;
2401 }
2402 
2403 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2404  kmp_int32 gtid) {
2405  char const *const func = "omp_test_lock";
2406  if (lck->lk.initialized != lck) {
2407  KMP_FATAL(LockIsUninitialized, func);
2408  }
2409  if (__kmp_is_drdpa_lock_nestable(lck)) {
2410  KMP_FATAL(LockNestableUsedAsSimple, func);
2411  }
2412 
2413  int retval = __kmp_test_drdpa_lock(lck, gtid);
2414 
2415  if (retval) {
2416  lck->lk.owner_id = gtid + 1;
2417  }
2418  return retval;
2419 }
2420 
2421 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2422  // Read the ticket value from the lock data struct, then the polls pointer and
2423  // the mask. The polls pointer must be read before the mask!!! (See above)
2424  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2425  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2426  kmp_uint64 mask = lck->lk.mask; // atomic load
2427  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2428  ticket - 1, lck));
2429  KMP_FSYNC_RELEASING(lck);
2430  polls[ticket & mask] = ticket; // atomic store
2431  return KMP_LOCK_RELEASED;
2432 }
2433 
2434 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2435  kmp_int32 gtid) {
2436  char const *const func = "omp_unset_lock";
2437  KMP_MB(); /* in case another processor initialized lock */
2438  if (lck->lk.initialized != lck) {
2439  KMP_FATAL(LockIsUninitialized, func);
2440  }
2441  if (__kmp_is_drdpa_lock_nestable(lck)) {
2442  KMP_FATAL(LockNestableUsedAsSimple, func);
2443  }
2444  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2445  KMP_FATAL(LockUnsettingFree, func);
2446  }
2447  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2448  (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2449  KMP_FATAL(LockUnsettingSetByAnother, func);
2450  }
2451  lck->lk.owner_id = 0;
2452  return __kmp_release_drdpa_lock(lck, gtid);
2453 }
2454 
2455 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2456  lck->lk.location = NULL;
2457  lck->lk.mask = 0;
2458  lck->lk.num_polls = 1;
2459  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2460  lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2461  lck->lk.cleanup_ticket = 0;
2462  lck->lk.old_polls = NULL;
2463  lck->lk.next_ticket = 0;
2464  lck->lk.now_serving = 0;
2465  lck->lk.owner_id = 0; // no thread owns the lock.
2466  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2467  lck->lk.initialized = lck;
2468 
2469  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2470 }
2471 
2472 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2473  lck->lk.initialized = NULL;
2474  lck->lk.location = NULL;
2475  if (lck->lk.polls.load() != NULL) {
2476  __kmp_free(lck->lk.polls.load());
2477  lck->lk.polls = NULL;
2478  }
2479  if (lck->lk.old_polls != NULL) {
2480  __kmp_free(lck->lk.old_polls);
2481  lck->lk.old_polls = NULL;
2482  }
2483  lck->lk.mask = 0;
2484  lck->lk.num_polls = 0;
2485  lck->lk.cleanup_ticket = 0;
2486  lck->lk.next_ticket = 0;
2487  lck->lk.now_serving = 0;
2488  lck->lk.owner_id = 0;
2489  lck->lk.depth_locked = -1;
2490 }
2491 
2492 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2493  char const *const func = "omp_destroy_lock";
2494  if (lck->lk.initialized != lck) {
2495  KMP_FATAL(LockIsUninitialized, func);
2496  }
2497  if (__kmp_is_drdpa_lock_nestable(lck)) {
2498  KMP_FATAL(LockNestableUsedAsSimple, func);
2499  }
2500  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2501  KMP_FATAL(LockStillOwned, func);
2502  }
2503  __kmp_destroy_drdpa_lock(lck);
2504 }
2505 
2506 // nested drdpa ticket locks
2507 
2508 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2509  KMP_DEBUG_ASSERT(gtid >= 0);
2510 
2511  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2512  lck->lk.depth_locked += 1;
2513  return KMP_LOCK_ACQUIRED_NEXT;
2514  } else {
2515  __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2516  KMP_MB();
2517  lck->lk.depth_locked = 1;
2518  KMP_MB();
2519  lck->lk.owner_id = gtid + 1;
2520  return KMP_LOCK_ACQUIRED_FIRST;
2521  }
2522 }
2523 
2524 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2525  kmp_int32 gtid) {
2526  char const *const func = "omp_set_nest_lock";
2527  if (lck->lk.initialized != lck) {
2528  KMP_FATAL(LockIsUninitialized, func);
2529  }
2530  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2531  KMP_FATAL(LockSimpleUsedAsNestable, func);
2532  }
2533  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2534 }
2535 
2536 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2537  int retval;
2538 
2539  KMP_DEBUG_ASSERT(gtid >= 0);
2540 
2541  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2542  retval = ++lck->lk.depth_locked;
2543  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2544  retval = 0;
2545  } else {
2546  KMP_MB();
2547  retval = lck->lk.depth_locked = 1;
2548  KMP_MB();
2549  lck->lk.owner_id = gtid + 1;
2550  }
2551  return retval;
2552 }
2553 
2554 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2555  kmp_int32 gtid) {
2556  char const *const func = "omp_test_nest_lock";
2557  if (lck->lk.initialized != lck) {
2558  KMP_FATAL(LockIsUninitialized, func);
2559  }
2560  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2561  KMP_FATAL(LockSimpleUsedAsNestable, func);
2562  }
2563  return __kmp_test_nested_drdpa_lock(lck, gtid);
2564 }
2565 
2566 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2567  KMP_DEBUG_ASSERT(gtid >= 0);
2568 
2569  KMP_MB();
2570  if (--(lck->lk.depth_locked) == 0) {
2571  KMP_MB();
2572  lck->lk.owner_id = 0;
2573  __kmp_release_drdpa_lock(lck, gtid);
2574  return KMP_LOCK_RELEASED;
2575  }
2576  return KMP_LOCK_STILL_HELD;
2577 }
2578 
2579 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2580  kmp_int32 gtid) {
2581  char const *const func = "omp_unset_nest_lock";
2582  KMP_MB(); /* in case another processor initialized lock */
2583  if (lck->lk.initialized != lck) {
2584  KMP_FATAL(LockIsUninitialized, func);
2585  }
2586  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2587  KMP_FATAL(LockSimpleUsedAsNestable, func);
2588  }
2589  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2590  KMP_FATAL(LockUnsettingFree, func);
2591  }
2592  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2593  KMP_FATAL(LockUnsettingSetByAnother, func);
2594  }
2595  return __kmp_release_nested_drdpa_lock(lck, gtid);
2596 }
2597 
2598 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2599  __kmp_init_drdpa_lock(lck);
2600  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2601 }
2602 
2603 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2604  __kmp_destroy_drdpa_lock(lck);
2605  lck->lk.depth_locked = 0;
2606 }
2607 
2608 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2609  char const *const func = "omp_destroy_nest_lock";
2610  if (lck->lk.initialized != lck) {
2611  KMP_FATAL(LockIsUninitialized, func);
2612  }
2613  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2614  KMP_FATAL(LockSimpleUsedAsNestable, func);
2615  }
2616  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2617  KMP_FATAL(LockStillOwned, func);
2618  }
2619  __kmp_destroy_nested_drdpa_lock(lck);
2620 }
2621 
2622 // access functions to fields which don't exist for all lock kinds.
2623 
2624 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2625  return lck->lk.location;
2626 }
2627 
2628 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2629  const ident_t *loc) {
2630  lck->lk.location = loc;
2631 }
2632 
2633 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2634  return lck->lk.flags;
2635 }
2636 
2637 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2638  kmp_lock_flags_t flags) {
2639  lck->lk.flags = flags;
2640 }
2641 
2642 // Time stamp counter
2643 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644 #define __kmp_tsc() __kmp_hardware_timestamp()
2645 // Runtime's default backoff parameters
2646 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2647 #else
2648 // Use nanoseconds for other platforms
2649 extern kmp_uint64 __kmp_now_nsec();
2650 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2651 #define __kmp_tsc() __kmp_now_nsec()
2652 #endif
2653 
2654 // A useful predicate for dealing with timestamps that may wrap.
2655 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2656 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657 // Times where going clockwise is less distance than going anti-clockwise
2658 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2659 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2660 // signed(b) = 0 captures the actual difference
2661 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2662  return ((kmp_int64)b - (kmp_int64)a) > 0;
2663 }
2664 
2665 // Truncated binary exponential backoff function
2666 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2667  // We could flatten this loop, but making it a nested loop gives better result
2668  kmp_uint32 i;
2669  for (i = boff->step; i > 0; i--) {
2670  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2671 #if KMP_HAVE_UMWAIT
2672  if (__kmp_umwait_enabled) {
2673  __kmp_tpause(0, boff->min_tick);
2674  } else {
2675 #endif
2676  do {
2677  KMP_CPU_PAUSE();
2678  } while (before(__kmp_tsc(), goal));
2679 #if KMP_HAVE_UMWAIT
2680  }
2681 #endif
2682  }
2683  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2684 }
2685 
2686 #if KMP_USE_DYNAMIC_LOCK
2687 
2688 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2689 // lock word.
2690 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2691  kmp_dyna_lockseq_t seq) {
2692  TCW_4(*lck, KMP_GET_D_TAG(seq));
2693  KA_TRACE(
2694  20,
2695  ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2696 }
2697 
2698 #if KMP_USE_TSX
2699 
2700 // HLE lock functions - imported from the testbed runtime.
2701 #define HLE_ACQUIRE ".byte 0xf2;"
2702 #define HLE_RELEASE ".byte 0xf3;"
2703 
2704 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2705  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2706  return v;
2707 }
2708 
2709 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2710 
2711 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2712  TCW_4(*lck, 0);
2713 }
2714 
2715 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2716  // Use gtid for KMP_LOCK_BUSY if necessary
2717  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2718  int delay = 1;
2719  do {
2720  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2721  for (int i = delay; i != 0; --i)
2722  KMP_CPU_PAUSE();
2723  delay = ((delay << 1) | 1) & 7;
2724  }
2725  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2726  }
2727 }
2728 
2729 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2730  kmp_int32 gtid) {
2731  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2732 }
2733 
2734 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2735  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2736  : "=m"(*lck)
2737  : "r"(KMP_LOCK_FREE(hle))
2738  : "memory");
2739  return KMP_LOCK_RELEASED;
2740 }
2741 
2742 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2743  kmp_int32 gtid) {
2744  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2745 }
2746 
2747 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2748  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2749 }
2750 
2751 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2752  kmp_int32 gtid) {
2753  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2754 }
2755 
2756 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2757  __kmp_init_queuing_lock(lck);
2758 }
2759 
2760 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2761  __kmp_destroy_queuing_lock(lck);
2762 }
2763 
2764 static void
2765 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2766  __kmp_destroy_queuing_lock_with_checks(lck);
2767 }
2768 
2769 KMP_ATTRIBUTE_TARGET_RTM
2770 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2771  kmp_int32 gtid) {
2772  unsigned retries = 3, status;
2773  do {
2774  status = _xbegin();
2775  if (status == _XBEGIN_STARTED) {
2776  if (__kmp_is_unlocked_queuing_lock(lck))
2777  return;
2778  _xabort(0xff);
2779  }
2780  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2781  // Wait until lock becomes free
2782  while (!__kmp_is_unlocked_queuing_lock(lck)) {
2783  KMP_YIELD(TRUE);
2784  }
2785  } else if (!(status & _XABORT_RETRY))
2786  break;
2787  } while (retries--);
2788 
2789  // Fall-back non-speculative lock (xchg)
2790  __kmp_acquire_queuing_lock(lck, gtid);
2791 }
2792 
2793 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794  kmp_int32 gtid) {
2795  __kmp_acquire_rtm_queuing_lock(lck, gtid);
2796 }
2797 
2798 KMP_ATTRIBUTE_TARGET_RTM
2799 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800  kmp_int32 gtid) {
2801  if (__kmp_is_unlocked_queuing_lock(lck)) {
2802  // Releasing from speculation
2803  _xend();
2804  } else {
2805  // Releasing from a real lock
2806  __kmp_release_queuing_lock(lck, gtid);
2807  }
2808  return KMP_LOCK_RELEASED;
2809 }
2810 
2811 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2812  kmp_int32 gtid) {
2813  return __kmp_release_rtm_queuing_lock(lck, gtid);
2814 }
2815 
2816 KMP_ATTRIBUTE_TARGET_RTM
2817 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2818  kmp_int32 gtid) {
2819  unsigned retries = 3, status;
2820  do {
2821  status = _xbegin();
2822  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2823  return 1;
2824  }
2825  if (!(status & _XABORT_RETRY))
2826  break;
2827  } while (retries--);
2828 
2829  return __kmp_test_queuing_lock(lck, gtid);
2830 }
2831 
2832 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2833  kmp_int32 gtid) {
2834  return __kmp_test_rtm_queuing_lock(lck, gtid);
2835 }
2836 
2837 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2839 
2840 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2841  KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2842 }
2843 
2844 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2845  __kmp_destroy_rtm_spin_lock(lck);
2846 }
2847 
2848 KMP_ATTRIBUTE_TARGET_RTM
2849 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2850  kmp_int32 gtid) {
2851  unsigned retries = 3, status;
2852  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2853  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2854  do {
2855  status = _xbegin();
2856  if (status == _XBEGIN_STARTED) {
2857  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2858  return KMP_LOCK_ACQUIRED_FIRST;
2859  _xabort(0xff);
2860  }
2861  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2862  // Wait until lock becomes free
2863  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2864  KMP_YIELD(TRUE);
2865  }
2866  } else if (!(status & _XABORT_RETRY))
2867  break;
2868  } while (retries--);
2869 
2870  // Fall-back spin lock
2871  KMP_FSYNC_PREPARE(lck);
2872  kmp_backoff_t backoff = __kmp_spin_backoff_params;
2873  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2874  !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2875  __kmp_spin_backoff(&backoff);
2876  }
2877  KMP_FSYNC_ACQUIRED(lck);
2878  return KMP_LOCK_ACQUIRED_FIRST;
2879 }
2880 
2881 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2882  kmp_int32 gtid) {
2883  return __kmp_acquire_rtm_spin_lock(lck, gtid);
2884 }
2885 
2886 KMP_ATTRIBUTE_TARGET_RTM
2887 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2888  kmp_int32 gtid) {
2889  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2890  // Releasing from speculation
2891  _xend();
2892  } else {
2893  // Releasing from a real lock
2894  KMP_FSYNC_RELEASING(lck);
2895  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2896  }
2897  return KMP_LOCK_RELEASED;
2898 }
2899 
2900 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2901  kmp_int32 gtid) {
2902  return __kmp_release_rtm_spin_lock(lck, gtid);
2903 }
2904 
2905 KMP_ATTRIBUTE_TARGET_RTM
2906 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2907  unsigned retries = 3, status;
2908  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2909  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2910  do {
2911  status = _xbegin();
2912  if (status == _XBEGIN_STARTED &&
2913  KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2914  return TRUE;
2915  }
2916  if (!(status & _XABORT_RETRY))
2917  break;
2918  } while (retries--);
2919 
2920  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2921  __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2922  KMP_FSYNC_ACQUIRED(lck);
2923  return TRUE;
2924  }
2925  return FALSE;
2926 }
2927 
2928 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2929  kmp_int32 gtid) {
2930  return __kmp_test_rtm_spin_lock(lck, gtid);
2931 }
2932 
2933 #endif // KMP_USE_TSX
2934 
2935 // Entry functions for indirect locks (first element of direct lock jump tables)
2936 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2937  kmp_dyna_lockseq_t tag);
2938 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2939 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2940 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2941 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2942 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2943  kmp_int32);
2944 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2945  kmp_int32);
2946 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2947  kmp_int32);
2948 
2949 // Lock function definitions for the union parameter type
2950 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2951 
2952 #define expand1(lk, op) \
2953  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2954  __kmp_##op##_##lk##_##lock(&lock->lk); \
2955  }
2956 #define expand2(lk, op) \
2957  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2958  kmp_int32 gtid) { \
2959  return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2960  }
2961 #define expand3(lk, op) \
2962  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2963  kmp_lock_flags_t flags) { \
2964  __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2965  }
2966 #define expand4(lk, op) \
2967  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2968  const ident_t *loc) { \
2969  __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2970  }
2971 
2972 KMP_FOREACH_LOCK_KIND(expand1, init)
2973 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2974 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2975 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2976 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2977 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2978 KMP_FOREACH_LOCK_KIND(expand2, release)
2979 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2980 KMP_FOREACH_LOCK_KIND(expand2, test)
2981 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2982 KMP_FOREACH_LOCK_KIND(expand3, )
2983 KMP_FOREACH_LOCK_KIND(expand4, )
2984 
2985 #undef expand1
2986 #undef expand2
2987 #undef expand3
2988 #undef expand4
2989 
2990 // Jump tables for the indirect lock functions
2991 // Only fill in the odd entries, that avoids the need to shift out the low bit
2992 
2993 // init functions
2994 #define expand(l, op) 0, __kmp_init_direct_lock,
2995 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2996  __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2997 #undef expand
2998 
2999 // destroy functions
3000 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
3002  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3003 #undef expand
3004 #define expand(l, op) \
3005  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3007  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3008 #undef expand
3009 
3010 // set/acquire functions
3011 #define expand(l, op) \
3012  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3013 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3014  __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3015 #undef expand
3016 #define expand(l, op) \
3017  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3018 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019  __kmp_set_indirect_lock_with_checks, 0,
3020  KMP_FOREACH_D_LOCK(expand, acquire)};
3021 #undef expand
3022 
3023 // unset/release and test functions
3024 #define expand(l, op) \
3025  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3026 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3027  __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3028 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3029  __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3030 #undef expand
3031 #define expand(l, op) \
3032  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3033 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3034  __kmp_unset_indirect_lock_with_checks, 0,
3035  KMP_FOREACH_D_LOCK(expand, release)};
3036 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3037  __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3038 #undef expand
3039 
3040 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3041 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3042 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3043 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3044 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3045 
3046 // Jump tables for the indirect lock functions
3047 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3048 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3049  KMP_FOREACH_I_LOCK(expand, init)};
3050 #undef expand
3051 
3052 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3054  KMP_FOREACH_I_LOCK(expand, destroy)};
3055 #undef expand
3056 #define expand(l, op) \
3057  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3059  KMP_FOREACH_I_LOCK(expand, destroy)};
3060 #undef expand
3061 
3062 // set/acquire functions
3063 #define expand(l, op) \
3064  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3065 static int (*indirect_set[])(kmp_user_lock_p,
3066  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3067 #undef expand
3068 #define expand(l, op) \
3069  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3070 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3071  KMP_FOREACH_I_LOCK(expand, acquire)};
3072 #undef expand
3073 
3074 // unset/release and test functions
3075 #define expand(l, op) \
3076  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3077 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3078  KMP_FOREACH_I_LOCK(expand, release)};
3079 static int (*indirect_test[])(kmp_user_lock_p,
3080  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3081 #undef expand
3082 #define expand(l, op) \
3083  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3084 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3085  KMP_FOREACH_I_LOCK(expand, release)};
3086 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3087  KMP_FOREACH_I_LOCK(expand, test)};
3088 #undef expand
3089 
3090 // Exposes only one jump tables (*lock or *lock_with_checks).
3091 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3092 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3093 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3094 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3095 
3096 // Lock index table.
3097 kmp_indirect_lock_table_t __kmp_i_lock_table;
3098 
3099 // Size of indirect locks.
3100 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3101 
3102 // Jump tables for lock accessor/modifier.
3103 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3104  const ident_t *) = {0};
3105 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3106  kmp_lock_flags_t) = {0};
3107 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3108  kmp_user_lock_p) = {0};
3109 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3110  kmp_user_lock_p) = {0};
3111 
3112 // Use different lock pools for different lock types.
3113 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3114 
3115 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3116 // the indirect lock table holds the address and type of the allocated indirect
3117 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3118 // full. A destroyed indirect lock object is returned to the reusable pool of
3119 // locks, unique to each lock type.
3120 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3121  kmp_int32 gtid,
3122  kmp_indirect_locktag_t tag) {
3123  kmp_indirect_lock_t *lck;
3124  kmp_lock_index_t idx, table_idx;
3125 
3126  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3127 
3128  if (__kmp_indirect_lock_pool[tag] != NULL) {
3129  // Reuse the allocated and destroyed lock object
3130  lck = __kmp_indirect_lock_pool[tag];
3131  if (OMP_LOCK_T_SIZE < sizeof(void *))
3132  idx = lck->lock->pool.index;
3133  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3134  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3135  lck));
3136  } else {
3137  kmp_uint32 row, col;
3138  kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3139  idx = 0;
3140  // Find location in list of lock tables to put new lock
3141  while (1) {
3142  table_idx = lock_table->next; // index within this table
3143  idx += lock_table->next; // global index within list of tables
3144  if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3145  row = table_idx / KMP_I_LOCK_CHUNK;
3146  col = table_idx % KMP_I_LOCK_CHUNK;
3147  // Allocate a new row of locks if necessary
3148  if (!lock_table->table[row]) {
3149  lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3150  sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3151  }
3152  break;
3153  }
3154  // Allocate a new lock table if necessary with double the capacity
3155  if (!lock_table->next_table) {
3156  kmp_indirect_lock_table_t *next_table =
3157  (kmp_indirect_lock_table_t *)__kmp_allocate(
3158  sizeof(kmp_indirect_lock_table_t));
3159  next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3160  sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3161  next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3162  next_table->next = 0;
3163  next_table->next_table = nullptr;
3164  lock_table->next_table = next_table;
3165  }
3166  lock_table = lock_table->next_table;
3167  KMP_ASSERT(lock_table);
3168  }
3169  lock_table->next++;
3170 
3171  lck = &lock_table->table[row][col];
3172  // Allocate a new base lock object
3173  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3174  KA_TRACE(20,
3175  ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3176  }
3177 
3178  __kmp_release_lock(&__kmp_global_lock, gtid);
3179 
3180  lck->type = tag;
3181 
3182  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3183  *((kmp_lock_index_t *)user_lock) = idx
3184  << 1; // indirect lock word must be even
3185  } else {
3186  *((kmp_indirect_lock_t **)user_lock) = lck;
3187  }
3188 
3189  return lck;
3190 }
3191 
3192 // User lock lookup for dynamically dispatched locks.
3193 static __forceinline kmp_indirect_lock_t *
3194 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3195  if (__kmp_env_consistency_check) {
3196  kmp_indirect_lock_t *lck = NULL;
3197  if (user_lock == NULL) {
3198  KMP_FATAL(LockIsUninitialized, func);
3199  }
3200  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3201  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3202  lck = __kmp_get_i_lock(idx);
3203  } else {
3204  lck = *((kmp_indirect_lock_t **)user_lock);
3205  }
3206  if (lck == NULL) {
3207  KMP_FATAL(LockIsUninitialized, func);
3208  }
3209  return lck;
3210  } else {
3211  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3212  return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3213  } else {
3214  return *((kmp_indirect_lock_t **)user_lock);
3215  }
3216  }
3217 }
3218 
3219 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3220  kmp_dyna_lockseq_t seq) {
3221 #if KMP_USE_ADAPTIVE_LOCKS
3222  if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3223  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3224  seq = lockseq_queuing;
3225  }
3226 #endif
3227 #if KMP_USE_TSX
3228  if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3229  seq = lockseq_queuing;
3230  }
3231 #endif
3232  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3233  kmp_indirect_lock_t *l =
3234  __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3235  KMP_I_LOCK_FUNC(l, init)(l->lock);
3236  KA_TRACE(
3237  20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3238  seq));
3239 }
3240 
3241 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3242  kmp_uint32 gtid = __kmp_entry_gtid();
3243  kmp_indirect_lock_t *l =
3244  __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3245  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3246  kmp_indirect_locktag_t tag = l->type;
3247 
3248  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3249 
3250  // Use the base lock's space to keep the pool chain.
3251  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3252  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3253  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3254  }
3255  __kmp_indirect_lock_pool[tag] = l;
3256 
3257  __kmp_release_lock(&__kmp_global_lock, gtid);
3258 }
3259 
3260 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263 }
3264 
3265 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3268 }
3269 
3270 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3271  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3272  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3273 }
3274 
3275 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3276  kmp_int32 gtid) {
3277  kmp_indirect_lock_t *l =
3278  __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3279  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3280 }
3281 
3282 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3283  kmp_int32 gtid) {
3284  kmp_indirect_lock_t *l =
3285  __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3286  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3287 }
3288 
3289 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3290  kmp_int32 gtid) {
3291  kmp_indirect_lock_t *l =
3292  __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3293  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3294 }
3295 
3296 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3297 
3298 // This is used only in kmp_error.cpp when consistency checking is on.
3299 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3300  switch (seq) {
3301  case lockseq_tas:
3302  case lockseq_nested_tas:
3303  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3304 #if KMP_USE_FUTEX
3305  case lockseq_futex:
3306  case lockseq_nested_futex:
3307  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3308 #endif
3309  case lockseq_ticket:
3310  case lockseq_nested_ticket:
3311  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3312  case lockseq_queuing:
3313  case lockseq_nested_queuing:
3314 #if KMP_USE_ADAPTIVE_LOCKS
3315  case lockseq_adaptive:
3316 #endif
3317  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3318  case lockseq_drdpa:
3319  case lockseq_nested_drdpa:
3320  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3321  default:
3322  return 0;
3323  }
3324 }
3325 
3326 // Initializes data for dynamic user locks.
3327 void __kmp_init_dynamic_user_locks() {
3328  // Initialize jump table for the lock functions
3329  if (__kmp_env_consistency_check) {
3330  __kmp_direct_set = direct_set_check;
3331  __kmp_direct_unset = direct_unset_check;
3332  __kmp_direct_test = direct_test_check;
3333  __kmp_direct_destroy = direct_destroy_check;
3334  __kmp_indirect_set = indirect_set_check;
3335  __kmp_indirect_unset = indirect_unset_check;
3336  __kmp_indirect_test = indirect_test_check;
3337  __kmp_indirect_destroy = indirect_destroy_check;
3338  } else {
3339  __kmp_direct_set = direct_set;
3340  __kmp_direct_unset = direct_unset;
3341  __kmp_direct_test = direct_test;
3342  __kmp_direct_destroy = direct_destroy;
3343  __kmp_indirect_set = indirect_set;
3344  __kmp_indirect_unset = indirect_unset;
3345  __kmp_indirect_test = indirect_test;
3346  __kmp_indirect_destroy = indirect_destroy;
3347  }
3348  // If the user locks have already been initialized, then return. Allow the
3349  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3350  // new lock tables if they have already been allocated.
3351  if (__kmp_init_user_locks)
3352  return;
3353 
3354  // Initialize lock index table
3355  __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3356  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3357  sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3358  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3359  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3360  __kmp_i_lock_table.next = 0;
3361  __kmp_i_lock_table.next_table = nullptr;
3362 
3363  // Indirect lock size
3364  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3365  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3366 #if KMP_USE_ADAPTIVE_LOCKS
3367  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3368 #endif
3369  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3370 #if KMP_USE_TSX
3371  __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3372 #endif
3373  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3374 #if KMP_USE_FUTEX
3375  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3376 #endif
3377  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3378  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3379  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3380 
3381 // Initialize lock accessor/modifier
3382 #define fill_jumps(table, expand, sep) \
3383  { \
3384  table[locktag##sep##ticket] = expand(ticket); \
3385  table[locktag##sep##queuing] = expand(queuing); \
3386  table[locktag##sep##drdpa] = expand(drdpa); \
3387  }
3388 
3389 #if KMP_USE_ADAPTIVE_LOCKS
3390 #define fill_table(table, expand) \
3391  { \
3392  fill_jumps(table, expand, _); \
3393  table[locktag_adaptive] = expand(queuing); \
3394  fill_jumps(table, expand, _nested_); \
3395  }
3396 #else
3397 #define fill_table(table, expand) \
3398  { \
3399  fill_jumps(table, expand, _); \
3400  fill_jumps(table, expand, _nested_); \
3401  }
3402 #endif // KMP_USE_ADAPTIVE_LOCKS
3403 
3404 #define expand(l) \
3405  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406  fill_table(__kmp_indirect_set_location, expand);
3407 #undef expand
3408 #define expand(l) \
3409  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410  fill_table(__kmp_indirect_set_flags, expand);
3411 #undef expand
3412 #define expand(l) \
3413  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414  fill_table(__kmp_indirect_get_location, expand);
3415 #undef expand
3416 #define expand(l) \
3417  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418  fill_table(__kmp_indirect_get_flags, expand);
3419 #undef expand
3420 
3421  __kmp_init_user_locks = TRUE;
3422 }
3423 
3424 // Clean up the lock table.
3425 void __kmp_cleanup_indirect_user_locks() {
3426  int k;
3427 
3428  // Clean up locks in the pools first (they were already destroyed before going
3429  // into the pools).
3430  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3431  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3432  while (l != NULL) {
3433  kmp_indirect_lock_t *ll = l;
3434  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3435  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3436  ll));
3437  __kmp_free(ll->lock);
3438  ll->lock = NULL;
3439  }
3440  __kmp_indirect_lock_pool[k] = NULL;
3441  }
3442  // Clean up the remaining undestroyed locks.
3443  kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3444  while (ptr) {
3445  for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3446  if (!ptr->table[row])
3447  continue;
3448  for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3449  kmp_indirect_lock_t *l = &ptr->table[row][col];
3450  if (l->lock) {
3451  // Locks not destroyed explicitly need to be destroyed here.
3452  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3453  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3454  "from table\n",
3455  l));
3456  __kmp_free(l->lock);
3457  }
3458  }
3459  __kmp_free(ptr->table[row]);
3460  }
3461  kmp_indirect_lock_table_t *next_table = ptr->next_table;
3462  if (ptr != &__kmp_i_lock_table)
3463  __kmp_free(ptr);
3464  ptr = next_table;
3465  }
3466 
3467  __kmp_init_user_locks = FALSE;
3468 }
3469 
3470 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3471 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3472 
3473 #else // KMP_USE_DYNAMIC_LOCK
3474 
3475 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3476  __kmp_init_tas_lock(lck);
3477 }
3478 
3479 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3480  __kmp_init_nested_tas_lock(lck);
3481 }
3482 
3483 #if KMP_USE_FUTEX
3484 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485  __kmp_init_futex_lock(lck);
3486 }
3487 
3488 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3489  __kmp_init_nested_futex_lock(lck);
3490 }
3491 #endif
3492 
3493 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3494  return lck == lck->lk.self;
3495 }
3496 
3497 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3498  __kmp_init_ticket_lock(lck);
3499 }
3500 
3501 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3502  __kmp_init_nested_ticket_lock(lck);
3503 }
3504 
3505 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3506  return lck == lck->lk.initialized;
3507 }
3508 
3509 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3510  __kmp_init_queuing_lock(lck);
3511 }
3512 
3513 static void
3514 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3515  __kmp_init_nested_queuing_lock(lck);
3516 }
3517 
3518 #if KMP_USE_ADAPTIVE_LOCKS
3519 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3520  __kmp_init_adaptive_lock(lck);
3521 }
3522 #endif
3523 
3524 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3525  return lck == lck->lk.initialized;
3526 }
3527 
3528 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3529  __kmp_init_drdpa_lock(lck);
3530 }
3531 
3532 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3533  __kmp_init_nested_drdpa_lock(lck);
3534 }
3535 
3536 /* user locks
3537  * They are implemented as a table of function pointers which are set to the
3538  * lock functions of the appropriate kind, once that has been determined. */
3539 
3540 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3541 
3542 size_t __kmp_base_user_lock_size = 0;
3543 size_t __kmp_user_lock_size = 0;
3544 
3545 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3546 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3547  kmp_int32 gtid) = NULL;
3548 
3549 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3550  kmp_int32 gtid) = NULL;
3551 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3552  kmp_int32 gtid) = NULL;
3553 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3554 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3555 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3556 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3557  kmp_int32 gtid) = NULL;
3558 
3559 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3560  kmp_int32 gtid) = NULL;
3561 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3562  kmp_int32 gtid) = NULL;
3563 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3564 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3565 
3566 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3567 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3568 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3569  const ident_t *loc) = NULL;
3570 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3571 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3572  kmp_lock_flags_t flags) = NULL;
3573 
3574 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3575  switch (user_lock_kind) {
3576  case lk_default:
3577  default:
3578  KMP_ASSERT(0);
3579 
3580  case lk_tas: {
3581  __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3582  __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3583 
3584  __kmp_get_user_lock_owner_ =
3585  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3586 
3587  if (__kmp_env_consistency_check) {
3588  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3589  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3590  } else {
3591  KMP_BIND_USER_LOCK(tas);
3592  KMP_BIND_NESTED_USER_LOCK(tas);
3593  }
3594 
3595  __kmp_destroy_user_lock_ =
3596  (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3597 
3598  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3599 
3600  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3601 
3602  __kmp_set_user_lock_location_ =
3603  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3604 
3605  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3606 
3607  __kmp_set_user_lock_flags_ =
3608  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3609  } break;
3610 
3611 #if KMP_USE_FUTEX
3612 
3613  case lk_futex: {
3614  __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3615  __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3616 
3617  __kmp_get_user_lock_owner_ =
3618  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3619 
3620  if (__kmp_env_consistency_check) {
3621  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3622  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3623  } else {
3624  KMP_BIND_USER_LOCK(futex);
3625  KMP_BIND_NESTED_USER_LOCK(futex);
3626  }
3627 
3628  __kmp_destroy_user_lock_ =
3629  (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3630 
3631  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3632 
3633  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3634 
3635  __kmp_set_user_lock_location_ =
3636  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3637 
3638  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3639 
3640  __kmp_set_user_lock_flags_ =
3641  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3642  } break;
3643 
3644 #endif // KMP_USE_FUTEX
3645 
3646  case lk_ticket: {
3647  __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3648  __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3649 
3650  __kmp_get_user_lock_owner_ =
3651  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3652 
3653  if (__kmp_env_consistency_check) {
3654  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3655  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3656  } else {
3657  KMP_BIND_USER_LOCK(ticket);
3658  KMP_BIND_NESTED_USER_LOCK(ticket);
3659  }
3660 
3661  __kmp_destroy_user_lock_ =
3662  (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3663 
3664  __kmp_is_user_lock_initialized_ =
3665  (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3666 
3667  __kmp_get_user_lock_location_ =
3668  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3669 
3670  __kmp_set_user_lock_location_ = (void (*)(
3671  kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3672 
3673  __kmp_get_user_lock_flags_ =
3674  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3675 
3676  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3677  &__kmp_set_ticket_lock_flags);
3678  } break;
3679 
3680  case lk_queuing: {
3681  __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3682  __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3683 
3684  __kmp_get_user_lock_owner_ =
3685  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3686 
3687  if (__kmp_env_consistency_check) {
3688  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3689  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3690  } else {
3691  KMP_BIND_USER_LOCK(queuing);
3692  KMP_BIND_NESTED_USER_LOCK(queuing);
3693  }
3694 
3695  __kmp_destroy_user_lock_ =
3696  (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3697 
3698  __kmp_is_user_lock_initialized_ =
3699  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3700 
3701  __kmp_get_user_lock_location_ =
3702  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3703 
3704  __kmp_set_user_lock_location_ = (void (*)(
3705  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3706 
3707  __kmp_get_user_lock_flags_ =
3708  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3709 
3710  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3711  &__kmp_set_queuing_lock_flags);
3712  } break;
3713 
3714 #if KMP_USE_ADAPTIVE_LOCKS
3715  case lk_adaptive: {
3716  __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3717  __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3718 
3719  __kmp_get_user_lock_owner_ =
3720  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3721 
3722  if (__kmp_env_consistency_check) {
3723  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3724  } else {
3725  KMP_BIND_USER_LOCK(adaptive);
3726  }
3727 
3728  __kmp_destroy_user_lock_ =
3729  (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3730 
3731  __kmp_is_user_lock_initialized_ =
3732  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3733 
3734  __kmp_get_user_lock_location_ =
3735  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3736 
3737  __kmp_set_user_lock_location_ = (void (*)(
3738  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3739 
3740  __kmp_get_user_lock_flags_ =
3741  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3742 
3743  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3744  &__kmp_set_queuing_lock_flags);
3745 
3746  } break;
3747 #endif // KMP_USE_ADAPTIVE_LOCKS
3748 
3749  case lk_drdpa: {
3750  __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3751  __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3752 
3753  __kmp_get_user_lock_owner_ =
3754  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3755 
3756  if (__kmp_env_consistency_check) {
3757  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3758  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3759  } else {
3760  KMP_BIND_USER_LOCK(drdpa);
3761  KMP_BIND_NESTED_USER_LOCK(drdpa);
3762  }
3763 
3764  __kmp_destroy_user_lock_ =
3765  (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3766 
3767  __kmp_is_user_lock_initialized_ =
3768  (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3769 
3770  __kmp_get_user_lock_location_ =
3771  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3772 
3773  __kmp_set_user_lock_location_ = (void (*)(
3774  kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3775 
3776  __kmp_get_user_lock_flags_ =
3777  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3778 
3779  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3780  &__kmp_set_drdpa_lock_flags);
3781  } break;
3782  }
3783 }
3784 
3785 // ----------------------------------------------------------------------------
3786 // User lock table & lock allocation
3787 
3788 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3789 kmp_user_lock_p __kmp_lock_pool = NULL;
3790 
3791 // Lock block-allocation support.
3792 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3793 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3794 
3795 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3796  // Assume that kmp_global_lock is held upon entry/exit.
3797  kmp_lock_index_t index;
3798  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3799  kmp_lock_index_t size;
3800  kmp_user_lock_p *table;
3801  // Reallocate lock table.
3802  if (__kmp_user_lock_table.allocated == 0) {
3803  size = 1024;
3804  } else {
3805  size = __kmp_user_lock_table.allocated * 2;
3806  }
3807  table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3808  KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3809  sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3810  table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3811  // We cannot free the previous table now, since it may be in use by other
3812  // threads. So save the pointer to the previous table in in the first
3813  // element of the new table. All the tables will be organized into a list,
3814  // and could be freed when library shutting down.
3815  __kmp_user_lock_table.table = table;
3816  __kmp_user_lock_table.allocated = size;
3817  }
3818  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3819  __kmp_user_lock_table.allocated);
3820  index = __kmp_user_lock_table.used;
3821  __kmp_user_lock_table.table[index] = lck;
3822  ++__kmp_user_lock_table.used;
3823  return index;
3824 }
3825 
3826 static kmp_user_lock_p __kmp_lock_block_allocate() {
3827  // Assume that kmp_global_lock is held upon entry/exit.
3828  static int last_index = 0;
3829  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3830  // Restart the index.
3831  last_index = 0;
3832  // Need to allocate a new block.
3833  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3834  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3835  char *buffer =
3836  (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3837  // Set up the new block.
3838  kmp_block_of_locks *new_block =
3839  (kmp_block_of_locks *)(&buffer[space_for_locks]);
3840  new_block->next_block = __kmp_lock_blocks;
3841  new_block->locks = (void *)buffer;
3842  // Publish the new block.
3843  KMP_MB();
3844  __kmp_lock_blocks = new_block;
3845  }
3846  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3847  ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3848  last_index++;
3849  return ret;
3850 }
3851 
3852 // Get memory for a lock. It may be freshly allocated memory or reused memory
3853 // from lock pool.
3854 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3855  kmp_lock_flags_t flags) {
3856  kmp_user_lock_p lck;
3857  kmp_lock_index_t index;
3858  KMP_DEBUG_ASSERT(user_lock);
3859 
3860  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3861 
3862  if (__kmp_lock_pool == NULL) {
3863  // Lock pool is empty. Allocate new memory.
3864 
3865  if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3866  lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3867  } else {
3868  lck = __kmp_lock_block_allocate();
3869  }
3870 
3871  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3872  // and debugger has info on all allocated locks.
3873  index = __kmp_lock_table_insert(lck);
3874  } else {
3875  // Pick up lock from pool.
3876  lck = __kmp_lock_pool;
3877  index = __kmp_lock_pool->pool.index;
3878  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3879  }
3880 
3881  // We could potentially differentiate between nested and regular locks
3882  // here, and do the lock table lookup for regular locks only.
3883  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3884  *((kmp_lock_index_t *)user_lock) = index;
3885  } else {
3886  *((kmp_user_lock_p *)user_lock) = lck;
3887  }
3888 
3889  // mark the lock if it is critical section lock.
3890  __kmp_set_user_lock_flags(lck, flags);
3891 
3892  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3893 
3894  return lck;
3895 }
3896 
3897 // Put lock's memory to pool for reusing.
3898 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3899  kmp_user_lock_p lck) {
3900  KMP_DEBUG_ASSERT(user_lock != NULL);
3901  KMP_DEBUG_ASSERT(lck != NULL);
3902 
3903  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3904 
3905  lck->pool.next = __kmp_lock_pool;
3906  __kmp_lock_pool = lck;
3907  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3908  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3909  KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3910  lck->pool.index = index;
3911  }
3912 
3913  __kmp_release_lock(&__kmp_global_lock, gtid);
3914 }
3915 
3916 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3917  kmp_user_lock_p lck = NULL;
3918 
3919  if (__kmp_env_consistency_check) {
3920  if (user_lock == NULL) {
3921  KMP_FATAL(LockIsUninitialized, func);
3922  }
3923  }
3924 
3925  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3926  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3927  if (__kmp_env_consistency_check) {
3928  if (!(0 < index && index < __kmp_user_lock_table.used)) {
3929  KMP_FATAL(LockIsUninitialized, func);
3930  }
3931  }
3932  KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3933  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3934  lck = __kmp_user_lock_table.table[index];
3935  } else {
3936  lck = *((kmp_user_lock_p *)user_lock);
3937  }
3938 
3939  if (__kmp_env_consistency_check) {
3940  if (lck == NULL) {
3941  KMP_FATAL(LockIsUninitialized, func);
3942  }
3943  }
3944 
3945  return lck;
3946 }
3947 
3948 void __kmp_cleanup_user_locks(void) {
3949  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3950  // iterating through lock table (it includes all the locks, dead or alive).
3951  __kmp_lock_pool = NULL;
3952 
3953 #define IS_CRITICAL(lck) \
3954  ((__kmp_get_user_lock_flags_ != NULL) && \
3955  ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3956 
3957  // Loop through lock table, free all locks.
3958  // Do not free item [0], it is reserved for lock tables list.
3959  //
3960  // FIXME - we are iterating through a list of (pointers to) objects of type
3961  // union kmp_user_lock, but we have no way of knowing whether the base type is
3962  // currently "pool" or whatever the global user lock type is.
3963  //
3964  // We are relying on the fact that for all of the user lock types
3965  // (except "tas"), the first field in the lock struct is the "initialized"
3966  // field, which is set to the address of the lock object itself when
3967  // the lock is initialized. When the union is of type "pool", the
3968  // first field is a pointer to the next object in the free list, which
3969  // will not be the same address as the object itself.
3970  //
3971  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3972  // for "pool" objects on the free list. This must happen as the "location"
3973  // field of real user locks overlaps the "index" field of "pool" objects.
3974  //
3975  // It would be better to run through the free list, and remove all "pool"
3976  // objects from the lock table before executing this loop. However,
3977  // "pool" objects do not always have their index field set (only on
3978  // lin_32e), and I don't want to search the lock table for the address
3979  // of every "pool" object on the free list.
3980  while (__kmp_user_lock_table.used > 1) {
3981  const ident *loc;
3982 
3983  // reduce __kmp_user_lock_table.used before freeing the lock,
3984  // so that state of locks is consistent
3985  kmp_user_lock_p lck =
3986  __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3987 
3988  if ((__kmp_is_user_lock_initialized_ != NULL) &&
3989  (*__kmp_is_user_lock_initialized_)(lck)) {
3990  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3991  // it is NOT a critical section (user is not responsible for destroying
3992  // criticals) AND we know source location to report.
3993  if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3994  ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3995  (loc->psource != NULL)) {
3996  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3997  KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3998  __kmp_str_loc_free(&str_loc);
3999  }
4000 
4001 #ifdef KMP_DEBUG
4002  if (IS_CRITICAL(lck)) {
4003  KA_TRACE(
4004  20,
4005  ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4006  lck, *(void **)lck));
4007  } else {
4008  KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4009  *(void **)lck));
4010  }
4011 #endif // KMP_DEBUG
4012 
4013  // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4014  __kmp_destroy_user_lock(lck);
4015  }
4016 
4017  // Free the lock if block allocation of locks is not used.
4018  if (__kmp_lock_blocks == NULL) {
4019  __kmp_free(lck);
4020  }
4021  }
4022 
4023 #undef IS_CRITICAL
4024 
4025  // delete lock table(s).
4026  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4027  __kmp_user_lock_table.table = NULL;
4028  __kmp_user_lock_table.allocated = 0;
4029 
4030  while (table_ptr != NULL) {
4031  // In the first element we saved the pointer to the previous
4032  // (smaller) lock table.
4033  kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4034  __kmp_free(table_ptr);
4035  table_ptr = next;
4036  }
4037 
4038  // Free buffers allocated for blocks of locks.
4039  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4040  __kmp_lock_blocks = NULL;
4041 
4042  while (block_ptr != NULL) {
4043  kmp_block_of_locks_t *next = block_ptr->next_block;
4044  __kmp_free(block_ptr->locks);
4045  // *block_ptr itself was allocated at the end of the locks vector.
4046  block_ptr = next;
4047  }
4048 
4049  TCW_4(__kmp_init_user_locks, FALSE);
4050 }
4051 
4052 #endif // KMP_USE_DYNAMIC_LOCK
void set_stdout()
Definition: kmp.h:4344
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition: kmp.h:4318
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244