LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 #include <limits>
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  long retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"),
66  KMP_ERR(error), __kmp_msg_null);
67  }
68  return error;
69  }
70  int set_system_affinity(bool abort_on_error) const override {
71  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
72  "Illegal set affinity operation when not capable");
73  long retval =
74  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
75  if (retval >= 0) {
76  return 0;
77  }
78  int error = errno;
79  if (abort_on_error) {
80  __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
81  KMP_ERR(error), __kmp_msg_null);
82  }
83  return error;
84  }
85 #if KMP_OS_WINDOWS
86  int set_process_affinity(bool abort_on_error) const override {
87  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
88  "Illegal set process affinity operation when not capable");
89  int error = 0;
90  const hwloc_topology_support *support =
91  hwloc_topology_get_support(__kmp_hwloc_topology);
92  if (support->cpubind->set_proc_cpubind) {
93  int retval;
94  retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
95  HWLOC_CPUBIND_PROCESS);
96  if (retval >= 0)
97  return 0;
98  error = errno;
99  if (abort_on_error)
100  __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
101  KMP_ERR(error), __kmp_msg_null);
102  }
103  return error;
104  }
105 #endif
106  int get_proc_group() const override {
107  int group = -1;
108 #if KMP_OS_WINDOWS
109  if (__kmp_num_proc_groups == 1) {
110  return 1;
111  }
112  for (int i = 0; i < __kmp_num_proc_groups; i++) {
113  // On windows, the long type is always 32 bits
114  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
115  unsigned long second_32_bits =
116  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
117  if (first_32_bits == 0 && second_32_bits == 0) {
118  continue;
119  }
120  if (group >= 0) {
121  return -1;
122  }
123  group = i;
124  }
125 #endif /* KMP_OS_WINDOWS */
126  return group;
127  }
128  };
129  void determine_capable(const char *var) override {
130  const hwloc_topology_support *topology_support;
131  if (__kmp_hwloc_topology == NULL) {
132  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
133  __kmp_hwloc_error = TRUE;
134  if (__kmp_affinity.flags.verbose) {
135  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
136  }
137  }
138  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
139  __kmp_hwloc_error = TRUE;
140  if (__kmp_affinity.flags.verbose) {
141  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
142  }
143  }
144  }
145  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
146  // Is the system capable of setting/getting this thread's affinity?
147  // Also, is topology discovery possible? (pu indicates ability to discover
148  // processing units). And finally, were there no errors when calling any
149  // hwloc_* API functions?
150  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
151  topology_support->cpubind->get_thisthread_cpubind &&
152  topology_support->discovery->pu && !__kmp_hwloc_error) {
153  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
154  KMP_AFFINITY_ENABLE(TRUE);
155  } else {
156  // indicate that hwloc didn't work and disable affinity
157  __kmp_hwloc_error = TRUE;
158  KMP_AFFINITY_DISABLE();
159  }
160  }
161  void bind_thread(int which) override {
162  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
163  "Illegal set affinity operation when not capable");
164  KMPAffinity::Mask *mask;
165  KMP_CPU_ALLOC_ON_STACK(mask);
166  KMP_CPU_ZERO(mask);
167  KMP_CPU_SET(which, mask);
168  __kmp_set_system_affinity(mask, TRUE);
169  KMP_CPU_FREE_FROM_STACK(mask);
170  }
171  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
172  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
173  KMPAffinity::Mask *allocate_mask_array(int num) override {
174  return new Mask[num];
175  }
176  void deallocate_mask_array(KMPAffinity::Mask *array) override {
177  Mask *hwloc_array = static_cast<Mask *>(array);
178  delete[] hwloc_array;
179  }
180  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
181  int index) override {
182  Mask *hwloc_array = static_cast<Mask *>(array);
183  return &(hwloc_array[index]);
184  }
185  api_type get_api_type() const override { return HWLOC; }
186 };
187 #endif /* KMP_USE_HWLOC */
188 
189 #if KMP_OS_LINUX || KMP_OS_FREEBSD
190 #if KMP_OS_LINUX
191 /* On some of the older OS's that we build on, these constants aren't present
192  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
193  all systems of the same arch where they are defined, and they cannot change.
194  stone forever. */
195 #include <sys/syscall.h>
196 #if KMP_ARCH_X86 || KMP_ARCH_ARM
197 #ifndef __NR_sched_setaffinity
198 #define __NR_sched_setaffinity 241
199 #elif __NR_sched_setaffinity != 241
200 #error Wrong code for setaffinity system call.
201 #endif /* __NR_sched_setaffinity */
202 #ifndef __NR_sched_getaffinity
203 #define __NR_sched_getaffinity 242
204 #elif __NR_sched_getaffinity != 242
205 #error Wrong code for getaffinity system call.
206 #endif /* __NR_sched_getaffinity */
207 #elif KMP_ARCH_AARCH64
208 #ifndef __NR_sched_setaffinity
209 #define __NR_sched_setaffinity 122
210 #elif __NR_sched_setaffinity != 122
211 #error Wrong code for setaffinity system call.
212 #endif /* __NR_sched_setaffinity */
213 #ifndef __NR_sched_getaffinity
214 #define __NR_sched_getaffinity 123
215 #elif __NR_sched_getaffinity != 123
216 #error Wrong code for getaffinity system call.
217 #endif /* __NR_sched_getaffinity */
218 #elif KMP_ARCH_RISCV64
219 #ifndef __NR_sched_setaffinity
220 #define __NR_sched_setaffinity 122
221 #elif __NR_sched_setaffinity != 122
222 #error Wrong code for setaffinity system call.
223 #endif /* __NR_sched_setaffinity */
224 #ifndef __NR_sched_getaffinity
225 #define __NR_sched_getaffinity 123
226 #elif __NR_sched_getaffinity != 123
227 #error Wrong code for getaffinity system call.
228 #endif /* __NR_sched_getaffinity */
229 #elif KMP_ARCH_X86_64
230 #ifndef __NR_sched_setaffinity
231 #define __NR_sched_setaffinity 203
232 #elif __NR_sched_setaffinity != 203
233 #error Wrong code for setaffinity system call.
234 #endif /* __NR_sched_setaffinity */
235 #ifndef __NR_sched_getaffinity
236 #define __NR_sched_getaffinity 204
237 #elif __NR_sched_getaffinity != 204
238 #error Wrong code for getaffinity system call.
239 #endif /* __NR_sched_getaffinity */
240 #elif KMP_ARCH_PPC64
241 #ifndef __NR_sched_setaffinity
242 #define __NR_sched_setaffinity 222
243 #elif __NR_sched_setaffinity != 222
244 #error Wrong code for setaffinity system call.
245 #endif /* __NR_sched_setaffinity */
246 #ifndef __NR_sched_getaffinity
247 #define __NR_sched_getaffinity 223
248 #elif __NR_sched_getaffinity != 223
249 #error Wrong code for getaffinity system call.
250 #endif /* __NR_sched_getaffinity */
251 #elif KMP_ARCH_MIPS
252 #ifndef __NR_sched_setaffinity
253 #define __NR_sched_setaffinity 4239
254 #elif __NR_sched_setaffinity != 4239
255 #error Wrong code for setaffinity system call.
256 #endif /* __NR_sched_setaffinity */
257 #ifndef __NR_sched_getaffinity
258 #define __NR_sched_getaffinity 4240
259 #elif __NR_sched_getaffinity != 4240
260 #error Wrong code for getaffinity system call.
261 #endif /* __NR_sched_getaffinity */
262 #elif KMP_ARCH_MIPS64
263 #ifndef __NR_sched_setaffinity
264 #define __NR_sched_setaffinity 5195
265 #elif __NR_sched_setaffinity != 5195
266 #error Wrong code for setaffinity system call.
267 #endif /* __NR_sched_setaffinity */
268 #ifndef __NR_sched_getaffinity
269 #define __NR_sched_getaffinity 5196
270 #elif __NR_sched_getaffinity != 5196
271 #error Wrong code for getaffinity system call.
272 #endif /* __NR_sched_getaffinity */
273 #elif KMP_ARCH_LOONGARCH64
274 #ifndef __NR_sched_setaffinity
275 #define __NR_sched_setaffinity 122
276 #elif __NR_sched_setaffinity != 122
277 #error Wrong code for setaffinity system call.
278 #endif /* __NR_sched_setaffinity */
279 #ifndef __NR_sched_getaffinity
280 #define __NR_sched_getaffinity 123
281 #elif __NR_sched_getaffinity != 123
282 #error Wrong code for getaffinity system call.
283 #endif /* __NR_sched_getaffinity */
284 #elif KMP_ARCH_RISCV64
285 #ifndef __NR_sched_setaffinity
286 #define __NR_sched_setaffinity 122
287 #elif __NR_sched_setaffinity != 122
288 #error Wrong code for setaffinity system call.
289 #endif /* __NR_sched_setaffinity */
290 #ifndef __NR_sched_getaffinity
291 #define __NR_sched_getaffinity 123
292 #elif __NR_sched_getaffinity != 123
293 #error Wrong code for getaffinity system call.
294 #endif /* __NR_sched_getaffinity */
295 #else
296 #error Unknown or unsupported architecture
297 #endif /* KMP_ARCH_* */
298 #elif KMP_OS_FREEBSD
299 #include <pthread.h>
300 #include <pthread_np.h>
301 #endif
302 class KMPNativeAffinity : public KMPAffinity {
303  class Mask : public KMPAffinity::Mask {
304  typedef unsigned long mask_t;
305  typedef decltype(__kmp_affin_mask_size) mask_size_type;
306  static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
307  static const mask_t ONE = 1;
308  mask_size_type get_num_mask_types() const {
309  return __kmp_affin_mask_size / sizeof(mask_t);
310  }
311 
312  public:
313  mask_t *mask;
314  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
315  ~Mask() {
316  if (mask)
317  __kmp_free(mask);
318  }
319  void set(int i) override {
320  mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
321  }
322  bool is_set(int i) const override {
323  return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
324  }
325  void clear(int i) override {
326  mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
327  }
328  void zero() override {
329  mask_size_type e = get_num_mask_types();
330  for (mask_size_type i = 0; i < e; ++i)
331  mask[i] = (mask_t)0;
332  }
333  void copy(const KMPAffinity::Mask *src) override {
334  const Mask *convert = static_cast<const Mask *>(src);
335  mask_size_type e = get_num_mask_types();
336  for (mask_size_type i = 0; i < e; ++i)
337  mask[i] = convert->mask[i];
338  }
339  void bitwise_and(const KMPAffinity::Mask *rhs) override {
340  const Mask *convert = static_cast<const Mask *>(rhs);
341  mask_size_type e = get_num_mask_types();
342  for (mask_size_type i = 0; i < e; ++i)
343  mask[i] &= convert->mask[i];
344  }
345  void bitwise_or(const KMPAffinity::Mask *rhs) override {
346  const Mask *convert = static_cast<const Mask *>(rhs);
347  mask_size_type e = get_num_mask_types();
348  for (mask_size_type i = 0; i < e; ++i)
349  mask[i] |= convert->mask[i];
350  }
351  void bitwise_not() override {
352  mask_size_type e = get_num_mask_types();
353  for (mask_size_type i = 0; i < e; ++i)
354  mask[i] = ~(mask[i]);
355  }
356  int begin() const override {
357  int retval = 0;
358  while (retval < end() && !is_set(retval))
359  ++retval;
360  return retval;
361  }
362  int end() const override {
363  int e;
364  __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
365  return e;
366  }
367  int next(int previous) const override {
368  int retval = previous + 1;
369  while (retval < end() && !is_set(retval))
370  ++retval;
371  return retval;
372  }
373  int get_system_affinity(bool abort_on_error) override {
374  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
375  "Illegal get affinity operation when not capable");
376 #if KMP_OS_LINUX
377  long retval =
378  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
379 #elif KMP_OS_FREEBSD
380  int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
381  reinterpret_cast<cpuset_t *>(mask));
382  int retval = (r == 0 ? 0 : -1);
383 #endif
384  if (retval >= 0) {
385  return 0;
386  }
387  int error = errno;
388  if (abort_on_error) {
389  __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"),
390  KMP_ERR(error), __kmp_msg_null);
391  }
392  return error;
393  }
394  int set_system_affinity(bool abort_on_error) const override {
395  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
396  "Illegal set affinity operation when not capable");
397 #if KMP_OS_LINUX
398  long retval =
399  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
400 #elif KMP_OS_FREEBSD
401  int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
402  reinterpret_cast<cpuset_t *>(mask));
403  int retval = (r == 0 ? 0 : -1);
404 #endif
405  if (retval >= 0) {
406  return 0;
407  }
408  int error = errno;
409  if (abort_on_error) {
410  __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"),
411  KMP_ERR(error), __kmp_msg_null);
412  }
413  return error;
414  }
415  };
416  void determine_capable(const char *env_var) override {
417  __kmp_affinity_determine_capable(env_var);
418  }
419  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
420  KMPAffinity::Mask *allocate_mask() override {
421  KMPNativeAffinity::Mask *retval = new Mask();
422  return retval;
423  }
424  void deallocate_mask(KMPAffinity::Mask *m) override {
425  KMPNativeAffinity::Mask *native_mask =
426  static_cast<KMPNativeAffinity::Mask *>(m);
427  delete native_mask;
428  }
429  KMPAffinity::Mask *allocate_mask_array(int num) override {
430  return new Mask[num];
431  }
432  void deallocate_mask_array(KMPAffinity::Mask *array) override {
433  Mask *linux_array = static_cast<Mask *>(array);
434  delete[] linux_array;
435  }
436  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
437  int index) override {
438  Mask *linux_array = static_cast<Mask *>(array);
439  return &(linux_array[index]);
440  }
441  api_type get_api_type() const override { return NATIVE_OS; }
442 };
443 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
444 
445 #if KMP_OS_WINDOWS
446 class KMPNativeAffinity : public KMPAffinity {
447  class Mask : public KMPAffinity::Mask {
448  typedef ULONG_PTR mask_t;
449  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
450  mask_t *mask;
451 
452  public:
453  Mask() {
454  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
455  }
456  ~Mask() {
457  if (mask)
458  __kmp_free(mask);
459  }
460  void set(int i) override {
461  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
462  }
463  bool is_set(int i) const override {
464  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
465  }
466  void clear(int i) override {
467  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
468  }
469  void zero() override {
470  for (int i = 0; i < __kmp_num_proc_groups; ++i)
471  mask[i] = 0;
472  }
473  void copy(const KMPAffinity::Mask *src) override {
474  const Mask *convert = static_cast<const Mask *>(src);
475  for (int i = 0; i < __kmp_num_proc_groups; ++i)
476  mask[i] = convert->mask[i];
477  }
478  void bitwise_and(const KMPAffinity::Mask *rhs) override {
479  const Mask *convert = static_cast<const Mask *>(rhs);
480  for (int i = 0; i < __kmp_num_proc_groups; ++i)
481  mask[i] &= convert->mask[i];
482  }
483  void bitwise_or(const KMPAffinity::Mask *rhs) override {
484  const Mask *convert = static_cast<const Mask *>(rhs);
485  for (int i = 0; i < __kmp_num_proc_groups; ++i)
486  mask[i] |= convert->mask[i];
487  }
488  void bitwise_not() override {
489  for (int i = 0; i < __kmp_num_proc_groups; ++i)
490  mask[i] = ~(mask[i]);
491  }
492  int begin() const override {
493  int retval = 0;
494  while (retval < end() && !is_set(retval))
495  ++retval;
496  return retval;
497  }
498  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
499  int next(int previous) const override {
500  int retval = previous + 1;
501  while (retval < end() && !is_set(retval))
502  ++retval;
503  return retval;
504  }
505  int set_process_affinity(bool abort_on_error) const override {
506  if (__kmp_num_proc_groups <= 1) {
507  if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
508  DWORD error = GetLastError();
509  if (abort_on_error) {
510  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
511  __kmp_msg_null);
512  }
513  return error;
514  }
515  }
516  return 0;
517  }
518  int set_system_affinity(bool abort_on_error) const override {
519  if (__kmp_num_proc_groups > 1) {
520  // Check for a valid mask.
521  GROUP_AFFINITY ga;
522  int group = get_proc_group();
523  if (group < 0) {
524  if (abort_on_error) {
525  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
526  }
527  return -1;
528  }
529  // Transform the bit vector into a GROUP_AFFINITY struct
530  // and make the system call to set affinity.
531  ga.Group = group;
532  ga.Mask = mask[group];
533  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
534 
535  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
536  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
537  DWORD error = GetLastError();
538  if (abort_on_error) {
539  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
540  __kmp_msg_null);
541  }
542  return error;
543  }
544  } else {
545  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
546  DWORD error = GetLastError();
547  if (abort_on_error) {
548  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
549  __kmp_msg_null);
550  }
551  return error;
552  }
553  }
554  return 0;
555  }
556  int get_system_affinity(bool abort_on_error) override {
557  if (__kmp_num_proc_groups > 1) {
558  this->zero();
559  GROUP_AFFINITY ga;
560  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
561  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
562  DWORD error = GetLastError();
563  if (abort_on_error) {
564  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
565  KMP_ERR(error), __kmp_msg_null);
566  }
567  return error;
568  }
569  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
570  (ga.Mask == 0)) {
571  return -1;
572  }
573  mask[ga.Group] = ga.Mask;
574  } else {
575  mask_t newMask, sysMask, retval;
576  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
577  DWORD error = GetLastError();
578  if (abort_on_error) {
579  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
580  KMP_ERR(error), __kmp_msg_null);
581  }
582  return error;
583  }
584  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
585  if (!retval) {
586  DWORD error = GetLastError();
587  if (abort_on_error) {
588  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
589  KMP_ERR(error), __kmp_msg_null);
590  }
591  return error;
592  }
593  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
594  if (!newMask) {
595  DWORD error = GetLastError();
596  if (abort_on_error) {
597  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
598  KMP_ERR(error), __kmp_msg_null);
599  }
600  }
601  *mask = retval;
602  }
603  return 0;
604  }
605  int get_proc_group() const override {
606  int group = -1;
607  if (__kmp_num_proc_groups == 1) {
608  return 1;
609  }
610  for (int i = 0; i < __kmp_num_proc_groups; i++) {
611  if (mask[i] == 0)
612  continue;
613  if (group >= 0)
614  return -1;
615  group = i;
616  }
617  return group;
618  }
619  };
620  void determine_capable(const char *env_var) override {
621  __kmp_affinity_determine_capable(env_var);
622  }
623  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
624  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
625  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
626  KMPAffinity::Mask *allocate_mask_array(int num) override {
627  return new Mask[num];
628  }
629  void deallocate_mask_array(KMPAffinity::Mask *array) override {
630  Mask *windows_array = static_cast<Mask *>(array);
631  delete[] windows_array;
632  }
633  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
634  int index) override {
635  Mask *windows_array = static_cast<Mask *>(array);
636  return &(windows_array[index]);
637  }
638  api_type get_api_type() const override { return NATIVE_OS; }
639 };
640 #endif /* KMP_OS_WINDOWS */
641 #endif /* KMP_AFFINITY_SUPPORTED */
642 
643 // Describe an attribute for a level in the machine topology
644 struct kmp_hw_attr_t {
645  int core_type : 8;
646  int core_eff : 8;
647  unsigned valid : 1;
648  unsigned reserved : 15;
649 
650  static const int UNKNOWN_CORE_EFF = -1;
651 
652  kmp_hw_attr_t()
653  : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
654  valid(0), reserved(0) {}
655  void set_core_type(kmp_hw_core_type_t type) {
656  valid = 1;
657  core_type = type;
658  }
659  void set_core_eff(int eff) {
660  valid = 1;
661  core_eff = eff;
662  }
663  kmp_hw_core_type_t get_core_type() const {
664  return (kmp_hw_core_type_t)core_type;
665  }
666  int get_core_eff() const { return core_eff; }
667  bool is_core_type_valid() const {
668  return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
669  }
670  bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
671  operator bool() const { return valid; }
672  void clear() {
673  core_type = KMP_HW_CORE_TYPE_UNKNOWN;
674  core_eff = UNKNOWN_CORE_EFF;
675  valid = 0;
676  }
677  bool contains(const kmp_hw_attr_t &other) const {
678  if (!valid && !other.valid)
679  return true;
680  if (valid && other.valid) {
681  if (other.is_core_type_valid()) {
682  if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
683  return false;
684  }
685  if (other.is_core_eff_valid()) {
686  if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
687  return false;
688  }
689  return true;
690  }
691  return false;
692  }
693  bool operator==(const kmp_hw_attr_t &rhs) const {
694  return (rhs.valid == valid && rhs.core_eff == core_eff &&
695  rhs.core_type == core_type);
696  }
697  bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
698 };
699 
700 #if KMP_AFFINITY_SUPPORTED
701 KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
702 #endif
703 
704 class kmp_hw_thread_t {
705 public:
706  static const int UNKNOWN_ID = -1;
707  static const int MULTIPLE_ID = -2;
708  static int compare_ids(const void *a, const void *b);
709  static int compare_compact(const void *a, const void *b);
710  int ids[KMP_HW_LAST];
711  int sub_ids[KMP_HW_LAST];
712  bool leader;
713  int os_id;
714  kmp_hw_attr_t attrs;
715 
716  void print() const;
717  void clear() {
718  for (int i = 0; i < (int)KMP_HW_LAST; ++i)
719  ids[i] = UNKNOWN_ID;
720  leader = false;
721  attrs.clear();
722  }
723 };
724 
725 class kmp_topology_t {
726 
727  struct flags_t {
728  int uniform : 1;
729  int reserved : 31;
730  };
731 
732  int depth;
733 
734  // The following arrays are all 'depth' long and have been
735  // allocated to hold up to KMP_HW_LAST number of objects if
736  // needed so layers can be added without reallocation of any array
737 
738  // Orderd array of the types in the topology
739  kmp_hw_t *types;
740 
741  // Keep quick topology ratios, for non-uniform topologies,
742  // this ratio holds the max number of itemAs per itemB
743  // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
744  int *ratio;
745 
746  // Storage containing the absolute number of each topology layer
747  int *count;
748 
749  // The number of core efficiencies. This is only useful for hybrid
750  // topologies. Core efficiencies will range from 0 to num efficiencies - 1
751  int num_core_efficiencies;
752  int num_core_types;
753  kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
754 
755  // The hardware threads array
756  // hw_threads is num_hw_threads long
757  // Each hw_thread's ids and sub_ids are depth deep
758  int num_hw_threads;
759  kmp_hw_thread_t *hw_threads;
760 
761  // Equivalence hash where the key is the hardware topology item
762  // and the value is the equivalent hardware topology type in the
763  // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
764  // known equivalence for the topology type
765  kmp_hw_t equivalent[KMP_HW_LAST];
766 
767  // Flags describing the topology
768  flags_t flags;
769 
770  // Compact value used during sort_compact()
771  int compact;
772 
773  // Insert a new topology layer after allocation
774  void _insert_layer(kmp_hw_t type, const int *ids);
775 
776 #if KMP_GROUP_AFFINITY
777  // Insert topology information about Windows Processor groups
778  void _insert_windows_proc_groups();
779 #endif
780 
781  // Count each item & get the num x's per y
782  // e.g., get the number of cores and the number of threads per core
783  // for each (x, y) in (KMP_HW_* , KMP_HW_*)
784  void _gather_enumeration_information();
785 
786  // Remove layers that don't add information to the topology.
787  // This is done by having the layer take on the id = UNKNOWN_ID (-1)
788  void _remove_radix1_layers();
789 
790  // Find out if the topology is uniform
791  void _discover_uniformity();
792 
793  // Set all the sub_ids for each hardware thread
794  void _set_sub_ids();
795 
796  // Set global affinity variables describing the number of threads per
797  // core, the number of packages, the number of cores per package, and
798  // the number of cores.
799  void _set_globals();
800 
801  // Set the last level cache equivalent type
802  void _set_last_level_cache();
803 
804  // Return the number of cores with a particular attribute, 'attr'.
805  // If 'find_all' is true, then find all cores on the machine, otherwise find
806  // all cores per the layer 'above'
807  int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
808  bool find_all = false) const;
809 
810 public:
811  // Force use of allocate()/deallocate()
812  kmp_topology_t() = delete;
813  kmp_topology_t(const kmp_topology_t &t) = delete;
814  kmp_topology_t(kmp_topology_t &&t) = delete;
815  kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
816  kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
817 
818  static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
819  static void deallocate(kmp_topology_t *);
820 
821  // Functions used in create_map() routines
822  kmp_hw_thread_t &at(int index) {
823  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
824  return hw_threads[index];
825  }
826  const kmp_hw_thread_t &at(int index) const {
827  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
828  return hw_threads[index];
829  }
830  int get_num_hw_threads() const { return num_hw_threads; }
831  void sort_ids() {
832  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
833  kmp_hw_thread_t::compare_ids);
834  }
835  // Check if the hardware ids are unique, if they are
836  // return true, otherwise return false
837  bool check_ids() const;
838 
839  // Function to call after the create_map() routine
840  void canonicalize();
841  void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
842 
843 // Functions used after canonicalize() called
844 
845 #if KMP_AFFINITY_SUPPORTED
846  // Set the granularity for affinity settings
847  void set_granularity(kmp_affinity_t &stgs) const;
848 #endif
849  bool filter_hw_subset();
850  bool is_close(int hwt1, int hwt2, int level) const;
851  bool is_uniform() const { return flags.uniform; }
852  // Tell whether a type is a valid type in the topology
853  // returns KMP_HW_UNKNOWN when there is no equivalent type
854  kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
855  // Set type1 = type2
856  void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
857  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
858  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
859  kmp_hw_t real_type2 = equivalent[type2];
860  if (real_type2 == KMP_HW_UNKNOWN)
861  real_type2 = type2;
862  equivalent[type1] = real_type2;
863  // This loop is required since any of the types may have been set to
864  // be equivalent to type1. They all must be checked and reset to type2.
865  KMP_FOREACH_HW_TYPE(type) {
866  if (equivalent[type] == type1) {
867  equivalent[type] = real_type2;
868  }
869  }
870  }
871  // Calculate number of types corresponding to level1
872  // per types corresponding to level2 (e.g., number of threads per core)
873  int calculate_ratio(int level1, int level2) const {
874  KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
875  KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
876  int r = 1;
877  for (int level = level1; level > level2; --level)
878  r *= ratio[level];
879  return r;
880  }
881  int get_ratio(int level) const {
882  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
883  return ratio[level];
884  }
885  int get_depth() const { return depth; };
886  kmp_hw_t get_type(int level) const {
887  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
888  return types[level];
889  }
890  int get_level(kmp_hw_t type) const {
891  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
892  int eq_type = equivalent[type];
893  if (eq_type == KMP_HW_UNKNOWN)
894  return -1;
895  for (int i = 0; i < depth; ++i)
896  if (types[i] == eq_type)
897  return i;
898  return -1;
899  }
900  int get_count(int level) const {
901  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
902  return count[level];
903  }
904  // Return the total number of cores with attribute 'attr'
905  int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
906  return _get_ncores_with_attr(attr, -1, true);
907  }
908  // Return the number of cores with attribute
909  // 'attr' per topology level 'above'
910  int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
911  return _get_ncores_with_attr(attr, above, false);
912  }
913 
914 #if KMP_AFFINITY_SUPPORTED
915  friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
916  void sort_compact(kmp_affinity_t &affinity) {
917  compact = affinity.compact;
918  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
919  kmp_hw_thread_t::compare_compact);
920  }
921 #endif
922  void print(const char *env_var = "KMP_AFFINITY") const;
923  void dump() const;
924 };
925 extern kmp_topology_t *__kmp_topology;
926 
927 class kmp_hw_subset_t {
928  const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
929 
930 public:
931  // Describe a machine topology item in KMP_HW_SUBSET
932  struct item_t {
933  kmp_hw_t type;
934  int num_attrs;
935  int num[MAX_ATTRS];
936  int offset[MAX_ATTRS];
937  kmp_hw_attr_t attr[MAX_ATTRS];
938  };
939  // Put parenthesis around max to avoid accidental use of Windows max macro.
940  const static int USE_ALL = (std::numeric_limits<int>::max)();
941 
942 private:
943  int depth;
944  int capacity;
945  item_t *items;
946  kmp_uint64 set;
947  bool absolute;
948  // The set must be able to handle up to KMP_HW_LAST number of layers
949  KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
950  // Sorting the KMP_HW_SUBSET items to follow topology order
951  // All unknown topology types will be at the beginning of the subset
952  static int hw_subset_compare(const void *i1, const void *i2) {
953  kmp_hw_t type1 = ((const item_t *)i1)->type;
954  kmp_hw_t type2 = ((const item_t *)i2)->type;
955  int level1 = __kmp_topology->get_level(type1);
956  int level2 = __kmp_topology->get_level(type2);
957  return level1 - level2;
958  }
959 
960 public:
961  // Force use of allocate()/deallocate()
962  kmp_hw_subset_t() = delete;
963  kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
964  kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
965  kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
966  kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
967 
968  static kmp_hw_subset_t *allocate() {
969  int initial_capacity = 5;
970  kmp_hw_subset_t *retval =
971  (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
972  retval->depth = 0;
973  retval->capacity = initial_capacity;
974  retval->set = 0ull;
975  retval->absolute = false;
976  retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
977  return retval;
978  }
979  static void deallocate(kmp_hw_subset_t *subset) {
980  __kmp_free(subset->items);
981  __kmp_free(subset);
982  }
983  void set_absolute() { absolute = true; }
984  bool is_absolute() const { return absolute; }
985  void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
986  for (int i = 0; i < depth; ++i) {
987  // Found an existing item for this layer type
988  // Add the num, offset, and attr to this item
989  if (items[i].type == type) {
990  int idx = items[i].num_attrs++;
991  if ((size_t)idx >= MAX_ATTRS)
992  return;
993  items[i].num[idx] = num;
994  items[i].offset[idx] = offset;
995  items[i].attr[idx] = attr;
996  return;
997  }
998  }
999  if (depth == capacity - 1) {
1000  capacity *= 2;
1001  item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
1002  for (int i = 0; i < depth; ++i)
1003  new_items[i] = items[i];
1004  __kmp_free(items);
1005  items = new_items;
1006  }
1007  items[depth].num_attrs = 1;
1008  items[depth].type = type;
1009  items[depth].num[0] = num;
1010  items[depth].offset[0] = offset;
1011  items[depth].attr[0] = attr;
1012  depth++;
1013  set |= (1ull << type);
1014  }
1015  int get_depth() const { return depth; }
1016  const item_t &at(int index) const {
1017  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1018  return items[index];
1019  }
1020  item_t &at(int index) {
1021  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1022  return items[index];
1023  }
1024  void remove(int index) {
1025  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1026  set &= ~(1ull << items[index].type);
1027  for (int j = index + 1; j < depth; ++j) {
1028  items[j - 1] = items[j];
1029  }
1030  depth--;
1031  }
1032  void sort() {
1033  KMP_DEBUG_ASSERT(__kmp_topology);
1034  qsort(items, depth, sizeof(item_t), hw_subset_compare);
1035  }
1036  bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1037  void dump() const {
1038  printf("**********************\n");
1039  printf("*** kmp_hw_subset: ***\n");
1040  printf("* depth: %d\n", depth);
1041  printf("* items:\n");
1042  for (int i = 0; i < depth; ++i) {
1043  printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1044  for (int j = 0; j < items[i].num_attrs; ++j) {
1045  printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1046  items[i].offset[j]);
1047  if (!items[i].attr[j]) {
1048  printf(" (none)\n");
1049  } else {
1050  printf(
1051  " core_type = %s, core_eff = %d\n",
1052  __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1053  items[i].attr[j].get_core_eff());
1054  }
1055  }
1056  }
1057  printf("* set: 0x%llx\n", set);
1058  printf("* absolute: %d\n", absolute);
1059  printf("**********************\n");
1060  }
1061 };
1062 extern kmp_hw_subset_t *__kmp_hw_subset;
1063 
1064 /* A structure for holding machine-specific hierarchy info to be computed once
1065  at init. This structure represents a mapping of threads to the actual machine
1066  hierarchy, or to our best guess at what the hierarchy might be, for the
1067  purpose of performing an efficient barrier. In the worst case, when there is
1068  no machine hierarchy information, it produces a tree suitable for a barrier,
1069  similar to the tree used in the hyper barrier. */
1070 class hierarchy_info {
1071 public:
1072  /* Good default values for number of leaves and branching factor, given no
1073  affinity information. Behaves a bit like hyper barrier. */
1074  static const kmp_uint32 maxLeaves = 4;
1075  static const kmp_uint32 minBranch = 4;
1081  kmp_uint32 maxLevels;
1082 
1087  kmp_uint32 depth;
1088  kmp_uint32 base_num_threads;
1089  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1090  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1091  // 2=initialization in progress
1092  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1093 
1098  kmp_uint32 *numPerLevel;
1099  kmp_uint32 *skipPerLevel;
1100 
1101  void deriveLevels() {
1102  int hier_depth = __kmp_topology->get_depth();
1103  for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1104  numPerLevel[level] = __kmp_topology->get_ratio(i);
1105  }
1106  }
1107 
1108  hierarchy_info()
1109  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1110 
1111  void fini() {
1112  if (!uninitialized && numPerLevel) {
1113  __kmp_free(numPerLevel);
1114  numPerLevel = NULL;
1115  uninitialized = not_initialized;
1116  }
1117  }
1118 
1119  void init(int num_addrs) {
1120  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1121  &uninitialized, not_initialized, initializing);
1122  if (bool_result == 0) { // Wait for initialization
1123  while (TCR_1(uninitialized) != initialized)
1124  KMP_CPU_PAUSE();
1125  return;
1126  }
1127  KMP_DEBUG_ASSERT(bool_result == 1);
1128 
1129  /* Added explicit initialization of the data fields here to prevent usage of
1130  dirty value observed when static library is re-initialized multiple times
1131  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1132  OpenMP). */
1133  depth = 1;
1134  resizing = 0;
1135  maxLevels = 7;
1136  numPerLevel =
1137  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1138  skipPerLevel = &(numPerLevel[maxLevels]);
1139  for (kmp_uint32 i = 0; i < maxLevels;
1140  ++i) { // init numPerLevel[*] to 1 item per level
1141  numPerLevel[i] = 1;
1142  skipPerLevel[i] = 1;
1143  }
1144 
1145  // Sort table by physical ID
1146  if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1147  deriveLevels();
1148  } else {
1149  numPerLevel[0] = maxLeaves;
1150  numPerLevel[1] = num_addrs / maxLeaves;
1151  if (num_addrs % maxLeaves)
1152  numPerLevel[1]++;
1153  }
1154 
1155  base_num_threads = num_addrs;
1156  for (int i = maxLevels - 1; i >= 0;
1157  --i) // count non-empty levels to get depth
1158  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1159  depth++;
1160 
1161  kmp_uint32 branch = minBranch;
1162  if (numPerLevel[0] == 1)
1163  branch = num_addrs / maxLeaves;
1164  if (branch < minBranch)
1165  branch = minBranch;
1166  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1167  while (numPerLevel[d] > branch ||
1168  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1169  if (numPerLevel[d] & 1)
1170  numPerLevel[d]++;
1171  numPerLevel[d] = numPerLevel[d] >> 1;
1172  if (numPerLevel[d + 1] == 1)
1173  depth++;
1174  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1175  }
1176  if (numPerLevel[0] == 1) {
1177  branch = branch >> 1;
1178  if (branch < 4)
1179  branch = minBranch;
1180  }
1181  }
1182 
1183  for (kmp_uint32 i = 1; i < depth; ++i)
1184  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1185  // Fill in hierarchy in the case of oversubscription
1186  for (kmp_uint32 i = depth; i < maxLevels; ++i)
1187  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1188 
1189  uninitialized = initialized; // One writer
1190  }
1191 
1192  // Resize the hierarchy if nproc changes to something larger than before
1193  void resize(kmp_uint32 nproc) {
1194  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1195  while (bool_result == 0) { // someone else is trying to resize
1196  KMP_CPU_PAUSE();
1197  if (nproc <= base_num_threads) // happy with other thread's resize
1198  return;
1199  else // try to resize
1200  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1201  }
1202  KMP_DEBUG_ASSERT(bool_result != 0);
1203  if (nproc <= base_num_threads)
1204  return; // happy with other thread's resize
1205 
1206  // Calculate new maxLevels
1207  kmp_uint32 old_sz = skipPerLevel[depth - 1];
1208  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1209  // First see if old maxLevels is enough to contain new size
1210  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1211  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1212  numPerLevel[i - 1] *= 2;
1213  old_sz *= 2;
1214  depth++;
1215  }
1216  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1217  while (nproc > old_sz) {
1218  old_sz *= 2;
1219  incs++;
1220  depth++;
1221  }
1222  maxLevels += incs;
1223 
1224  // Resize arrays
1225  kmp_uint32 *old_numPerLevel = numPerLevel;
1226  kmp_uint32 *old_skipPerLevel = skipPerLevel;
1227  numPerLevel = skipPerLevel = NULL;
1228  numPerLevel =
1229  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1230  skipPerLevel = &(numPerLevel[maxLevels]);
1231 
1232  // Copy old elements from old arrays
1233  for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1234  // init numPerLevel[*] to 1 item per level
1235  numPerLevel[i] = old_numPerLevel[i];
1236  skipPerLevel[i] = old_skipPerLevel[i];
1237  }
1238 
1239  // Init new elements in arrays to 1
1240  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1241  // init numPerLevel[*] to 1 item per level
1242  numPerLevel[i] = 1;
1243  skipPerLevel[i] = 1;
1244  }
1245 
1246  // Free old arrays
1247  __kmp_free(old_numPerLevel);
1248  }
1249 
1250  // Fill in oversubscription levels of hierarchy
1251  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1252  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1253 
1254  base_num_threads = nproc;
1255  resizing = 0; // One writer
1256  }
1257 };
1258 #endif // KMP_AFFINITY_H