Continuing the investigation that started in cri-o/cri-o#6973
Apparently, sometimes systemd removes the cgroup before conmon can read its memory.events
. I don't have any ideas about how to work around that race (in conmon; that is).
Another small issue I found is sometimes OOM can be misdetected because the code use the same static counter variable for both oom
and oom_kill
, which appears to be wrong.
|
gboolean check_cgroup2_oom() |
|
{ |
|
static long int last_counter = 0; |
|
|
|
if (!is_cgroup_v2) |
|
return G_SOURCE_REMOVE; |
|
|
|
_cleanup_free_ char *memory_events_file_path = g_build_filename(cgroup2_path, "memory.events", NULL); |
|
|
|
_cleanup_fclose_ FILE *fp = fopen(memory_events_file_path, "re"); |
|
if (fp == NULL) { |
|
nwarnf("Failed to open cgroups file: %s", memory_events_file_path); |
|
return G_SOURCE_CONTINUE; |
|
} |
|
|
|
_cleanup_free_ char *line = NULL; |
|
size_t len = 0; |
|
ssize_t read; |
|
while ((read = getline(&line, &len, fp)) != -1) { |
|
long int counter; |
|
const int oom_len = 4, oom_kill_len = 9; |
|
|
|
if (read >= oom_kill_len + 2 && memcmp(line, "oom_kill ", oom_kill_len) == 0) |
|
len = oom_kill_len; |
|
else if (read >= oom_len + 2 && memcmp(line, "oom ", oom_len) == 0) |
|
len = oom_len; |
|
else |
|
continue; |
|
|
|
counter = strtol(&line[len], NULL, 10); |
|
|
|
if (counter == LONG_MAX) { |
|
nwarnf("Failed to parse: %s", &line[len]); |
|
continue; |
|
} |
|
|
|
if (counter == 0) |
|
continue; |
|
|
|
if (counter != last_counter) { |
|
if (write_oom_files() == 0) |
|
last_counter = counter; |