Correct hardware-independent thread pinning
schuay opened this issue · 1 comments
schuay commented
Have you thought about using hwloc to do hardware independent thread pinning? It would add a dependency on the one hand, but make things much easier for people running this lib on the other. It'd also let allow you to remove the horrible the_cores hack ;)
A quick draft of the required changes below:
diff --git a/common/Makefile.common b/common/Makefile.common
index d8bba15..f51050b 100644
--- a/common/Makefile.common
+++ b/common/Makefile.common
@@ -248,7 +248,7 @@ CFLAGS += -Wall
CFLAGS += -fno-strict-aliasing
CFLAGS += -I$(LIBAO_INC) -I$(ROOT)/include -I$(LIBSSMEM)/include
-LDFLAGS += -lpthread -lrt -lm
+LDFLAGS += -lpthread -lrt -lm -lhwloc
######################
# compilation settings
diff --git a/include/utils.h b/include/utils.h
index 609fe5a..c5e4f92 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -655,36 +655,25 @@ static __attribute__ ((unused)) double eng_per_test_iter_nj[40][5] =
return (double)t.tv_sec + ((double)t.tv_usec)/1000000.0;
}
+#include <hwloc.h>
+ hwloc_topology_t m_topology;
+
static inline
void set_cpu(int cpu)
{
-#ifndef NO_SET_CPU
-# ifdef __sparc__
- processor_bind(P_LWPID,P_MYID, cpu, NULL);
-# elif defined(__tile__)
- if (cpu>=tmc_cpus_grid_total()) {
- perror("Thread id too high");
+ const int depth = hwloc_get_type_or_below_depth(m_topology, HWLOC_OBJ_CORE);
+ const int ncores = hwloc_get_nbobjs_by_depth(m_topology, depth);
+
+ const hwloc_obj_t obj = hwloc_get_obj_by_depth(m_topology, depth, cpu % ncores);
+
+ hwloc_cpuset_t cpuset = hwloc_bitmap_dup(obj->cpuset);
+ hwloc_bitmap_singlify(cpuset);
+
+ if (hwloc_set_cpubind(m_topology, cpuset, HWLOC_CPUBIND_THREAD) != 0) {
+ fprintf(stderr, "Could not bind to core: %s\n", strerror(errno));
}
- // cput_set_t cpus;
- if (tmc_cpus_set_my_cpu(cpu)<0) {
- tmc_task_die("tmc_cpus_set_my_cpu() failed.");
- }
-# else
- cpu %= (NUMBER_OF_SOCKETS * CORES_PER_SOCKET);
-
- cpu_set_t mask;
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
-# if defined(PLATFORM_NUMA)
- numa_set_preferred(get_cluster(cpu));
-# endif
- pthread_t thread = pthread_self();
- if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &mask) != 0)
- {
- fprintf(stderr, "Error setting thread affinity\n");
- }
-# endif
-#endif
+
+ hwloc_bitmap_free(cpuset);
}
diff --git a/src/priorityqueue-alistarh/test_simple.c b/src/priorityqueue-alistarh/test_simple.c
index 03099f7..5c1ebdc 100755
--- a/src/priorityqueue-alistarh/test_simple.c
+++ b/src/priorityqueue-alistarh/test_simple.c
@@ -130,7 +130,7 @@ test(void* thread)
thread_data_t* td = (thread_data_t*) thread;
uint32_t ID = td->id;
int phys_id = the_cores[ID];
- set_cpu(phys_id);
+ set_cpu(ID);
ssalloc_init();
DS_TYPE* set = td->set;
@@ -295,10 +295,16 @@ test(void* thread)
pthread_exit(NULL);
}
+#include <hwloc.h>
+extern hwloc_topology_t m_topology;
+
int
main(int argc, char **argv)
{
- set_cpu(the_cores[0]);
+ hwloc_topology_init(&m_topology);
+ hwloc_topology_load(m_topology);
+
+ set_cpu(0);
ssalloc_init();
seeds = seed_rand();
@@ -398,8 +404,6 @@ main(int argc, char **argv)
}
}
-
-
if (!is_power_of_two(initial))
{
size_t initial_pow2 = pow2roundup(initial);
@@ -608,6 +612,8 @@ main(int argc, char **argv)
RR_PRINT_UNPROTECTED(RAPL_PRINT_POW);
RR_PRINT_CORRECTED();
RETRY_STATS_PRINT(total, putting_count_total, removing_count_total, putting_count_total_succ + removing_count_total_succ);
+
+ hwloc_topology_destroy(m_topology);
pthread_exit(NULL);
trigonak commented
That is a good idea. We will integrate it as an option as soon as possible.
Thanks.