13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
20 #if KMP_AFFINITY_SUPPORTED
22 class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
34 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
35 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
36 void zero()
override { hwloc_bitmap_zero(mask); }
37 void copy(
const KMPAffinity::Mask *src)
override {
38 const Mask *convert =
static_cast<const Mask *
>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
41 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
42 const Mask *convert =
static_cast<const Mask *
>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
45 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
46 const Mask *convert =
static_cast<const Mask *
>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
49 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
50 int begin()
const override {
return hwloc_bitmap_first(mask); }
51 int end()
const override {
return -1; }
52 int next(
int previous)
const override {
53 return hwloc_bitmap_next(mask, previous);
55 int get_system_affinity(
bool abort_on_error)
override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
69 int set_system_affinity(
bool abort_on_error)
const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal set affinity operation when not capable");
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
84 int set_process_affinity(
bool abort_on_error)
const override {
85 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86 "Illegal set process affinity operation when not capable");
88 const hwloc_topology_support *support =
89 hwloc_topology_get_support(__kmp_hwloc_topology);
90 if (support->cpubind->set_proc_cpubind) {
92 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93 HWLOC_CPUBIND_PROCESS);
98 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
103 int get_proc_group()
const override {
106 if (__kmp_num_proc_groups == 1) {
109 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
111 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112 unsigned long second_32_bits =
113 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114 if (first_32_bits == 0 && second_32_bits == 0) {
126 void determine_capable(
const char *var)
override {
127 const hwloc_topology_support *topology_support;
128 if (__kmp_hwloc_topology == NULL) {
129 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130 __kmp_hwloc_error = TRUE;
131 if (__kmp_affinity_verbose)
132 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
134 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
135 __kmp_hwloc_error = TRUE;
136 if (__kmp_affinity_verbose)
137 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
140 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
145 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
146 topology_support->cpubind->get_thisthread_cpubind &&
147 topology_support->discovery->pu && !__kmp_hwloc_error) {
149 KMP_AFFINITY_ENABLE(TRUE);
152 __kmp_hwloc_error = TRUE;
153 KMP_AFFINITY_DISABLE();
156 void bind_thread(
int which)
override {
157 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
158 "Illegal set affinity operation when not capable");
159 KMPAffinity::Mask *mask;
160 KMP_CPU_ALLOC_ON_STACK(mask);
162 KMP_CPU_SET(which, mask);
163 __kmp_set_system_affinity(mask, TRUE);
164 KMP_CPU_FREE_FROM_STACK(mask);
166 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
167 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
168 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
169 return new Mask[num];
171 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
172 Mask *hwloc_array =
static_cast<Mask *
>(array);
173 delete[] hwloc_array;
175 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
176 int index)
override {
177 Mask *hwloc_array =
static_cast<Mask *
>(array);
178 return &(hwloc_array[index]);
180 api_type get_api_type()
const override {
return HWLOC; }
184 #if KMP_OS_LINUX || KMP_OS_FREEBSD
190 #include <sys/syscall.h>
191 #if KMP_ARCH_X86 || KMP_ARCH_ARM
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 241
194 #elif __NR_sched_setaffinity != 241
195 #error Wrong code for setaffinity system call.
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 242
199 #elif __NR_sched_getaffinity != 242
200 #error Wrong code for getaffinity system call.
202 #elif KMP_ARCH_AARCH64
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 122
205 #elif __NR_sched_setaffinity != 122
206 #error Wrong code for setaffinity system call.
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 123
210 #elif __NR_sched_getaffinity != 123
211 #error Wrong code for getaffinity system call.
213 #elif KMP_ARCH_RISCV64
214 #ifndef __NR_sched_setaffinity
215 #define __NR_sched_setaffinity 122
216 #elif __NR_sched_setaffinity != 122
217 #error Wrong code for setaffinity system call.
219 #ifndef __NR_sched_getaffinity
220 #define __NR_sched_getaffinity 123
221 #elif __NR_sched_getaffinity != 123
222 #error Wrong code for getaffinity system call.
224 #elif KMP_ARCH_X86_64
225 #ifndef __NR_sched_setaffinity
226 #define __NR_sched_setaffinity 203
227 #elif __NR_sched_setaffinity != 203
228 #error Wrong code for setaffinity system call.
230 #ifndef __NR_sched_getaffinity
231 #define __NR_sched_getaffinity 204
232 #elif __NR_sched_getaffinity != 204
233 #error Wrong code for getaffinity system call.
236 #ifndef __NR_sched_setaffinity
237 #define __NR_sched_setaffinity 222
238 #elif __NR_sched_setaffinity != 222
239 #error Wrong code for setaffinity system call.
241 #ifndef __NR_sched_getaffinity
242 #define __NR_sched_getaffinity 223
243 #elif __NR_sched_getaffinity != 223
244 #error Wrong code for getaffinity system call.
247 # ifndef __NR_sched_setaffinity
248 # define __NR_sched_setaffinity 4239
249 # elif __NR_sched_setaffinity != 4239
250 # error Wrong code for setaffinity system call.
252 # ifndef __NR_sched_getaffinity
253 # define __NR_sched_getaffinity 4240
254 # elif __NR_sched_getaffinity != 4240
255 # error Wrong code for getaffinity system call.
257 # elif KMP_ARCH_MIPS64
258 # ifndef __NR_sched_setaffinity
259 # define __NR_sched_setaffinity 5195
260 # elif __NR_sched_setaffinity != 5195
261 # error Wrong code for setaffinity system call.
263 # ifndef __NR_sched_getaffinity
264 # define __NR_sched_getaffinity 5196
265 # elif __NR_sched_getaffinity != 5196
266 # error Wrong code for getaffinity system call.
269 #error Unknown or unsupported architecture
273 #include <pthread_np.h>
275 class KMPNativeAffinity :
public KMPAffinity {
276 class Mask :
public KMPAffinity::Mask {
277 typedef unsigned long mask_t;
278 typedef decltype(__kmp_affin_mask_size) mask_size_type;
279 static const unsigned int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
280 static const mask_t ONE = 1;
281 mask_size_type get_num_mask_types()
const {
282 return __kmp_affin_mask_size /
sizeof(mask_t);
287 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
292 void set(
int i)
override {
293 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
295 bool is_set(
int i)
const override {
296 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
298 void clear(
int i)
override {
299 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
301 void zero()
override {
302 mask_size_type e = get_num_mask_types();
303 for (mask_size_type i = 0; i < e; ++i)
306 void copy(
const KMPAffinity::Mask *src)
override {
307 const Mask *convert =
static_cast<const Mask *
>(src);
308 mask_size_type e = get_num_mask_types();
309 for (mask_size_type i = 0; i < e; ++i)
310 mask[i] = convert->mask[i];
312 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
313 const Mask *convert =
static_cast<const Mask *
>(rhs);
314 mask_size_type e = get_num_mask_types();
315 for (mask_size_type i = 0; i < e; ++i)
316 mask[i] &= convert->mask[i];
318 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
319 const Mask *convert =
static_cast<const Mask *
>(rhs);
320 mask_size_type e = get_num_mask_types();
321 for (mask_size_type i = 0; i < e; ++i)
322 mask[i] |= convert->mask[i];
324 void bitwise_not()
override {
325 mask_size_type e = get_num_mask_types();
326 for (mask_size_type i = 0; i < e; ++i)
327 mask[i] = ~(mask[i]);
329 int begin()
const override {
331 while (retval < end() && !is_set(retval))
335 int end()
const override {
337 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
340 int next(
int previous)
const override {
341 int retval = previous + 1;
342 while (retval < end() && !is_set(retval))
346 int get_system_affinity(
bool abort_on_error)
override {
347 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
348 "Illegal get affinity operation when not capable");
351 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
353 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
354 reinterpret_cast<cpuset_t *
>(mask));
355 int retval = (r == 0 ? 0 : -1);
361 if (abort_on_error) {
362 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
366 int set_system_affinity(
bool abort_on_error)
const override {
367 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
368 "Illegal set affinity operation when not capable");
371 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
373 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
374 reinterpret_cast<cpuset_t *
>(mask));
375 int retval = (r == 0 ? 0 : -1);
381 if (abort_on_error) {
382 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
387 void determine_capable(
const char *env_var)
override {
388 __kmp_affinity_determine_capable(env_var);
390 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
391 KMPAffinity::Mask *allocate_mask()
override {
392 KMPNativeAffinity::Mask *retval =
new Mask();
395 void deallocate_mask(KMPAffinity::Mask *m)
override {
396 KMPNativeAffinity::Mask *native_mask =
397 static_cast<KMPNativeAffinity::Mask *
>(m);
400 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
401 return new Mask[num];
403 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
404 Mask *linux_array =
static_cast<Mask *
>(array);
405 delete[] linux_array;
407 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
408 int index)
override {
409 Mask *linux_array =
static_cast<Mask *
>(array);
410 return &(linux_array[index]);
412 api_type get_api_type()
const override {
return NATIVE_OS; }
417 class KMPNativeAffinity :
public KMPAffinity {
418 class Mask :
public KMPAffinity::Mask {
419 typedef ULONG_PTR mask_t;
420 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
425 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
431 void set(
int i)
override {
432 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
434 bool is_set(
int i)
const override {
435 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
437 void clear(
int i)
override {
438 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
440 void zero()
override {
441 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
444 void copy(
const KMPAffinity::Mask *src)
override {
445 const Mask *convert =
static_cast<const Mask *
>(src);
446 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
447 mask[i] = convert->mask[i];
449 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
450 const Mask *convert =
static_cast<const Mask *
>(rhs);
451 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
452 mask[i] &= convert->mask[i];
454 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
455 const Mask *convert =
static_cast<const Mask *
>(rhs);
456 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
457 mask[i] |= convert->mask[i];
459 void bitwise_not()
override {
460 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
461 mask[i] = ~(mask[i]);
463 int begin()
const override {
465 while (retval < end() && !is_set(retval))
469 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
470 int next(
int previous)
const override {
471 int retval = previous + 1;
472 while (retval < end() && !is_set(retval))
476 int set_process_affinity(
bool abort_on_error)
const override {
477 if (__kmp_num_proc_groups <= 1) {
478 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
479 DWORD error = GetLastError();
480 if (abort_on_error) {
481 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
489 int set_system_affinity(
bool abort_on_error)
const override {
490 if (__kmp_num_proc_groups > 1) {
493 int group = get_proc_group();
495 if (abort_on_error) {
496 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
503 ga.Mask = mask[group];
504 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
506 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
507 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
508 DWORD error = GetLastError();
509 if (abort_on_error) {
510 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
516 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
517 DWORD error = GetLastError();
518 if (abort_on_error) {
519 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
527 int get_system_affinity(
bool abort_on_error)
override {
528 if (__kmp_num_proc_groups > 1) {
531 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
532 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
533 DWORD error = GetLastError();
534 if (abort_on_error) {
535 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
536 KMP_ERR(error), __kmp_msg_null);
540 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
544 mask[ga.Group] = ga.Mask;
546 mask_t newMask, sysMask, retval;
547 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
548 DWORD error = GetLastError();
549 if (abort_on_error) {
550 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
551 KMP_ERR(error), __kmp_msg_null);
555 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
557 DWORD error = GetLastError();
558 if (abort_on_error) {
559 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
560 KMP_ERR(error), __kmp_msg_null);
564 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
566 DWORD error = GetLastError();
567 if (abort_on_error) {
568 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
569 KMP_ERR(error), __kmp_msg_null);
576 int get_proc_group()
const override {
578 if (__kmp_num_proc_groups == 1) {
581 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
591 void determine_capable(
const char *env_var)
override {
592 __kmp_affinity_determine_capable(env_var);
594 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
595 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
596 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
597 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
598 return new Mask[num];
600 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
601 Mask *windows_array =
static_cast<Mask *
>(array);
602 delete[] windows_array;
604 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
605 int index)
override {
606 Mask *windows_array =
static_cast<Mask *
>(array);
607 return &(windows_array[index]);
609 api_type get_api_type()
const override {
return NATIVE_OS; }
615 struct kmp_hw_attr_t {
619 unsigned reserved : 15;
621 static const int UNKNOWN_CORE_EFF = -1;
624 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
625 valid(0), reserved(0) {}
626 void set_core_type(kmp_hw_core_type_t type) {
630 void set_core_eff(
int eff) {
634 kmp_hw_core_type_t get_core_type()
const {
635 return (kmp_hw_core_type_t)core_type;
637 int get_core_eff()
const {
return core_eff; }
638 bool is_core_type_valid()
const {
639 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
641 bool is_core_eff_valid()
const {
return core_eff != UNKNOWN_CORE_EFF; }
642 operator bool()
const {
return valid; }
644 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
645 core_eff = UNKNOWN_CORE_EFF;
648 bool contains(
const kmp_hw_attr_t &other)
const {
649 if (!valid && !other.valid)
651 if (valid && other.valid) {
652 if (other.is_core_type_valid()) {
653 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
656 if (other.is_core_eff_valid()) {
657 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
664 bool operator==(
const kmp_hw_attr_t &rhs)
const {
665 return (rhs.valid == valid && rhs.core_eff == core_eff &&
666 rhs.core_type == core_type);
668 bool operator!=(
const kmp_hw_attr_t &rhs)
const {
return !operator==(rhs); }
671 class kmp_hw_thread_t {
673 static const int UNKNOWN_ID = -1;
674 static int compare_ids(
const void *a,
const void *b);
675 static int compare_compact(
const void *a,
const void *b);
676 int ids[KMP_HW_LAST];
677 int sub_ids[KMP_HW_LAST];
684 for (
int i = 0; i < (int)KMP_HW_LAST; ++i)
691 class kmp_topology_t {
717 int num_core_efficiencies;
719 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
725 kmp_hw_thread_t *hw_threads;
731 kmp_hw_t equivalent[KMP_HW_LAST];
737 void _insert_layer(kmp_hw_t type,
const int *ids);
739 #if KMP_GROUP_AFFINITY
741 void _insert_windows_proc_groups();
747 void _gather_enumeration_information();
751 void _remove_radix1_layers();
754 void _discover_uniformity();
765 void _set_last_level_cache();
770 int _get_ncores_with_attr(
const kmp_hw_attr_t &attr,
int above,
771 bool find_all =
false)
const;
775 kmp_topology_t() =
delete;
776 kmp_topology_t(
const kmp_topology_t &t) =
delete;
777 kmp_topology_t(kmp_topology_t &&t) =
delete;
778 kmp_topology_t &operator=(
const kmp_topology_t &t) =
delete;
779 kmp_topology_t &operator=(kmp_topology_t &&t) =
delete;
781 static kmp_topology_t *allocate(
int nproc,
int ndepth,
const kmp_hw_t *types);
782 static void deallocate(kmp_topology_t *);
785 kmp_hw_thread_t &at(
int index) {
786 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
787 return hw_threads[index];
789 const kmp_hw_thread_t &at(
int index)
const {
790 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
791 return hw_threads[index];
793 int get_num_hw_threads()
const {
return num_hw_threads; }
795 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
796 kmp_hw_thread_t::compare_ids);
800 bool check_ids()
const;
804 void canonicalize(
int pkgs,
int cores_per_pkg,
int thr_per_core,
int cores);
807 bool filter_hw_subset();
808 bool is_close(
int hwt1,
int hwt2,
int level)
const;
809 bool is_uniform()
const {
return flags.uniform; }
812 kmp_hw_t get_equivalent_type(kmp_hw_t type)
const {
return equivalent[type]; }
814 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
815 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
816 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
817 kmp_hw_t real_type2 = equivalent[type2];
818 if (real_type2 == KMP_HW_UNKNOWN)
820 equivalent[type1] = real_type2;
823 KMP_FOREACH_HW_TYPE(type) {
824 if (equivalent[type] == type1) {
825 equivalent[type] = real_type2;
831 int calculate_ratio(
int level1,
int level2)
const {
832 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
833 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
835 for (
int level = level1; level > level2; --level)
839 int get_ratio(
int level)
const {
840 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
843 int get_depth()
const {
return depth; };
844 kmp_hw_t get_type(
int level)
const {
845 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
848 int get_level(kmp_hw_t type)
const {
849 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
850 int eq_type = equivalent[type];
851 if (eq_type == KMP_HW_UNKNOWN)
853 for (
int i = 0; i < depth; ++i)
854 if (types[i] == eq_type)
858 int get_count(
int level)
const {
859 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
863 int get_ncores_with_attr(
const kmp_hw_attr_t &attr)
const {
864 return _get_ncores_with_attr(attr, -1,
true);
868 int get_ncores_with_attr_per(
const kmp_hw_attr_t &attr,
int above)
const {
869 return _get_ncores_with_attr(attr, above,
false);
872 #if KMP_AFFINITY_SUPPORTED
873 void sort_compact() {
874 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
875 kmp_hw_thread_t::compare_compact);
878 void print(
const char *env_var =
"KMP_AFFINITY")
const;
881 extern kmp_topology_t *__kmp_topology;
883 class kmp_hw_subset_t {
884 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
892 int offset[MAX_ATTRS];
893 kmp_hw_attr_t attr[MAX_ATTRS];
896 const static int USE_ALL = (std::numeric_limits<int>::max)();
905 KMP_BUILD_ASSERT(
sizeof(set) * 8 >= KMP_HW_LAST);
908 static int hw_subset_compare(
const void *i1,
const void *i2) {
909 kmp_hw_t type1 = ((
const item_t *)i1)->type;
910 kmp_hw_t type2 = ((
const item_t *)i2)->type;
911 int level1 = __kmp_topology->get_level(type1);
912 int level2 = __kmp_topology->get_level(type2);
913 return level1 - level2;
918 kmp_hw_subset_t() =
delete;
919 kmp_hw_subset_t(
const kmp_hw_subset_t &t) =
delete;
920 kmp_hw_subset_t(kmp_hw_subset_t &&t) =
delete;
921 kmp_hw_subset_t &operator=(
const kmp_hw_subset_t &t) =
delete;
922 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) =
delete;
924 static kmp_hw_subset_t *allocate() {
925 int initial_capacity = 5;
926 kmp_hw_subset_t *retval =
927 (kmp_hw_subset_t *)__kmp_allocate(
sizeof(kmp_hw_subset_t));
929 retval->capacity = initial_capacity;
931 retval->absolute =
false;
932 retval->items = (item_t *)__kmp_allocate(
sizeof(item_t) * initial_capacity);
935 static void deallocate(kmp_hw_subset_t *subset) {
936 __kmp_free(subset->items);
939 void set_absolute() { absolute =
true; }
940 bool is_absolute()
const {
return absolute; }
941 void push_back(
int num, kmp_hw_t type,
int offset, kmp_hw_attr_t attr) {
942 for (
int i = 0; i < depth; ++i) {
945 if (items[i].type == type) {
946 int idx = items[i].num_attrs++;
947 if ((
size_t)idx >= MAX_ATTRS)
949 items[i].num[idx] = num;
950 items[i].offset[idx] = offset;
951 items[i].attr[idx] = attr;
955 if (depth == capacity - 1) {
957 item_t *new_items = (item_t *)__kmp_allocate(
sizeof(item_t) * capacity);
958 for (
int i = 0; i < depth; ++i)
959 new_items[i] = items[i];
963 items[depth].num_attrs = 1;
964 items[depth].type = type;
965 items[depth].num[0] = num;
966 items[depth].offset[0] = offset;
967 items[depth].attr[0] = attr;
969 set |= (1ull << type);
971 int get_depth()
const {
return depth; }
972 const item_t &at(
int index)
const {
973 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
976 item_t &at(
int index) {
977 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
980 void remove(
int index) {
981 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
982 set &= ~(1ull << items[index].type);
983 for (
int j = index + 1; j < depth; ++j) {
984 items[j - 1] = items[j];
989 KMP_DEBUG_ASSERT(__kmp_topology);
990 qsort(items, depth,
sizeof(item_t), hw_subset_compare);
992 bool specified(kmp_hw_t type)
const {
return ((set & (1ull << type)) > 0); }
994 printf(
"**********************\n");
995 printf(
"*** kmp_hw_subset: ***\n");
996 printf(
"* depth: %d\n", depth);
997 printf(
"* items:\n");
998 for (
int i = 0; i < depth; ++i) {
999 printf(
" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1000 for (
int j = 0; j < items[i].num_attrs; ++j) {
1001 printf(
" num: %d, offset: %d, attr: ", items[i].num[j],
1002 items[i].offset[j]);
1003 if (!items[i].attr[j]) {
1004 printf(
" (none)\n");
1007 " core_type = %s, core_eff = %d\n",
1008 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1009 items[i].attr[j].get_core_eff());
1013 printf(
"* set: 0x%llx\n", set);
1014 printf(
"* absolute: %d\n", absolute);
1015 printf(
"**********************\n");
1018 extern kmp_hw_subset_t *__kmp_hw_subset;
1026 class hierarchy_info {
1030 static const kmp_uint32 maxLeaves = 4;
1031 static const kmp_uint32 minBranch = 4;
1037 kmp_uint32 maxLevels;
1044 kmp_uint32 base_num_threads;
1045 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1046 volatile kmp_int8 uninitialized;
1048 volatile kmp_int8 resizing;
1054 kmp_uint32 *numPerLevel;
1055 kmp_uint32 *skipPerLevel;
1057 void deriveLevels() {
1058 int hier_depth = __kmp_topology->get_depth();
1059 for (
int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1060 numPerLevel[level] = __kmp_topology->get_ratio(i);
1065 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1068 if (!uninitialized && numPerLevel) {
1069 __kmp_free(numPerLevel);
1071 uninitialized = not_initialized;
1075 void init(
int num_addrs) {
1076 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1077 &uninitialized, not_initialized, initializing);
1078 if (bool_result == 0) {
1079 while (TCR_1(uninitialized) != initialized)
1083 KMP_DEBUG_ASSERT(bool_result == 1);
1093 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1094 skipPerLevel = &(numPerLevel[maxLevels]);
1095 for (kmp_uint32 i = 0; i < maxLevels;
1098 skipPerLevel[i] = 1;
1102 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1105 numPerLevel[0] = maxLeaves;
1106 numPerLevel[1] = num_addrs / maxLeaves;
1107 if (num_addrs % maxLeaves)
1111 base_num_threads = num_addrs;
1112 for (
int i = maxLevels - 1; i >= 0;
1114 if (numPerLevel[i] != 1 || depth > 1)
1117 kmp_uint32 branch = minBranch;
1118 if (numPerLevel[0] == 1)
1119 branch = num_addrs / maxLeaves;
1120 if (branch < minBranch)
1122 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
1123 while (numPerLevel[d] > branch ||
1124 (d == 0 && numPerLevel[d] > maxLeaves)) {
1125 if (numPerLevel[d] & 1)
1127 numPerLevel[d] = numPerLevel[d] >> 1;
1128 if (numPerLevel[d + 1] == 1)
1130 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1132 if (numPerLevel[0] == 1) {
1133 branch = branch >> 1;
1139 for (kmp_uint32 i = 1; i < depth; ++i)
1140 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1142 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1143 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1145 uninitialized = initialized;
1149 void resize(kmp_uint32 nproc) {
1150 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1151 while (bool_result == 0) {
1153 if (nproc <= base_num_threads)
1156 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1158 KMP_DEBUG_ASSERT(bool_result != 0);
1159 if (nproc <= base_num_threads)
1163 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1164 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1166 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1167 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1168 numPerLevel[i - 1] *= 2;
1172 if (nproc > old_sz) {
1173 while (nproc > old_sz) {
1181 kmp_uint32 *old_numPerLevel = numPerLevel;
1182 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1183 numPerLevel = skipPerLevel = NULL;
1185 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1186 skipPerLevel = &(numPerLevel[maxLevels]);
1189 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1191 numPerLevel[i] = old_numPerLevel[i];
1192 skipPerLevel[i] = old_skipPerLevel[i];
1196 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1199 skipPerLevel[i] = 1;
1203 __kmp_free(old_numPerLevel);
1207 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1208 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1210 base_num_threads = nproc;