heftig/rtkit

RTKit does not work on musl libc as a result of lacking sched_getscheduler and sched_setscheduler

Opened this issue · 5 comments

As in the title, musl libc does not implement these functions, since Linux's syscall behaviour is seen as non-compliant with the POSIX standard as written (it takes a thread ID rather than a normal pid, and returns 0 on success, rather than returning the previous policy).

Void Linux currently uses the attached patch to implement sched_getscheduler and sched_setscheduler for musl.

This patch vendors in the implementations of sched_getscheduler() and
sched_setscheduler() from
https://git.musl-libc.org/cgit/musl/commit/?id=61be1cfec1f5da66c68f92a6939e3a38e673c9d6
that were subsequently dropped from higher POSIX compatibility. However, rtkit
needs those functions to work. Since each function is ~ 1 line of code, we
can just vendor them in here.

For more details, see discussion in
https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/19915
--- rtkit-0.13.orig/rtkit-daemon.c
+++ rtkit-0.13/rtkit-daemon.c
@@ -32,6 +32,7 @@
 #include <sys/stat.h>
 #include <string.h>
 #include <sched.h>
+#include <syscall.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <stdlib.h>
@@ -196,6 +197,15 @@
 static pthread_t canary_thread_id = 0, watchdog_thread_id = 0;
 static volatile uint32_t refuse_until = 0;
 
+static int _sched_setscheduler(pid_t pid, int sched, const struct sched_param *param) {
+        static const struct sched_param def;
+        return syscall(SYS_sched_setscheduler, pid, 0, &def);
+}
+
+static int _sched_getscheduler(pid_t pid) {
+        return syscall(SYS_sched_getscheduler, pid);
+}
+
 static const char *get_proc_path(void) {
         /* Useful for chroot environments */
 
@@ -439,7 +449,7 @@
         if (st != t->starttime)
                 return FALSE;
 
-        if ((r = sched_getscheduler(t->pid)) < 0) {
+        if ((r = _sched_getscheduler(t->pid)) < 0) {
 
                 /* Maybe it died right now? */
                 if (errno == ESRCH)
@@ -564,7 +574,7 @@
         memset(&param, 0, sizeof(param));
         param.sched_priority = priority;
 
-        if (sched_setscheduler(0, sched_policy|SCHED_RESET_ON_FORK, &param) < 0) {
+        if (_sched_setscheduler(0, sched_policy|SCHED_RESET_ON_FORK, &param) < 0) {
                 r = -errno;
                 syslog(LOG_ERR, "Failed to make ourselves RT: %s\n", strerror(errno));
                 goto finish;
@@ -581,7 +591,7 @@
 
         memset(&param, 0, sizeof(param));
 
-        if (sched_setscheduler(0, SCHED_OTHER, &param) < 0)
+        if (_sched_setscheduler(0, SCHED_OTHER, &param) < 0)
                 syslog(LOG_WARNING, "Warning: Failed to reset scheduling to SCHED_OTHER: %s\n", strerror(errno));
 
         if (setpriority(PRIO_PROCESS, 0, nice_level) < 0)
@@ -696,7 +706,7 @@
         memset(&param, 0, sizeof(param));
         param.sched_priority = 0;
 
-        if (sched_setscheduler(tid, SCHED_OTHER, &param) < 0) {
+        if (_sched_setscheduler(tid, SCHED_OTHER, &param) < 0) {
                 if (errno != ESRCH)
                         syslog(LOG_WARNING, "Warning: Failed to reset scheduling to SCHED_OTHER for thread %llu: %s\n", (unsigned long long) tid, strerror(errno));
                 r = -1;
@@ -748,7 +758,7 @@
         /* Ok, everything seems to be in order, now, let's do it */
         memset(&param, 0, sizeof(param));
         param.sched_priority = (int) priority;
-        if (sched_setscheduler(t->pid, sched_policy|SCHED_RESET_ON_FORK, &param) < 0) {
+        if (_sched_setscheduler(t->pid, sched_policy|SCHED_RESET_ON_FORK, &param) < 0) {
                 r = -errno;
                 syslog(LOG_ERR, "Failed to make thread %llu RT: %s\n", (unsigned long long) t->pid, strerror(errno));
                 goto finish;
@@ -809,7 +819,7 @@
         /* Ok, everything seems to be in order, now, let's do it */
         memset(&param, 0, sizeof(param));
         param.sched_priority = 0;
-        if (sched_setscheduler(t->pid, SCHED_OTHER|SCHED_RESET_ON_FORK, &param) < 0) {
+        if (_sched_setscheduler(t->pid, SCHED_OTHER|SCHED_RESET_ON_FORK, &param) < 0) {
                 r = -errno;
                 syslog(LOG_ERR, "Failed to make process %llu SCHED_NORMAL: %s\n", (unsigned long long) t->pid, strerror(errno));
                 goto finish;
@@ -943,7 +953,7 @@
                         if (errno != 0 || !e || *e != 0)
                                 continue;
 
-                        if ((r = sched_getscheduler(tid)) < 0) {
+                        if ((r = _sched_getscheduler(tid)) < 0) {
                                 if (errno != ESRCH)
                                         syslog(LOG_WARNING, "Warning: sched_getscheduler() failed: %s\n", strerror(errno));
                                 continue;

This is not a particularly elegant fix, so if you would like me to I can make one with #ifdefs and the like to check for musl and only use the syscall / alternative implementation in that case.

 +static int _sched_setscheduler(pid_t pid, int sched, const struct sched_param *param) {
 +        static const struct sched_param def;
 +        return syscall(SYS_sched_setscheduler, pid, 0, &def);
 +}

Why is the struct sched_param parameter ignored?

For SCHED_OTHER, SCHED_BATCH, and SCHED_IDLE, it must always contain .sched_priority=0, so it is not actually used by the kernel. For SCHED_FIFO and SCHED_RR, it does matter, and I am as-of-yet unsure as to why musl did not handle this possibility. The patch implements the sched_[set/get]scheduler functions exactly as they were in musl prior to being dropped.

Looking at this mailing list thread: https://www.openwall.com/lists/musl/2016/03/01/5

The reason it doesn't do anything is that Linux does not provide a way
to set scheduling parameters for a process, only for threads. The
sched_setscheduler syscall is documented as taking a pid but actually
takes a thread id and only operates on that thread. glibc just ignores
this and provides sched_* functions that do the wrong thing.

Fortunately there's an easy fix: use pthread_setschedparam, and
pthread_self to get the pthread_t value you need to pass to it.

A more elegant solution would be for rtkit to switch to pthread_setschedparam?

Also see this commit description here: https://git.musl-libc.org/cgit/musl/commit/src/sched/sched_setparam.c?id=1e21e78bf7a5c24c217446d8760be7b7188711c2