systemd crashes when manager->dev_autofs_fd is -1 and a->tokens is not empty
taskset opened this issue · 0 comments
- Coredump analysis
#0 0x00007f644f19e8c7 in kill () from /lib64/libc.so.6
#1 0x00005556566edcdd in crash (sig=6) at src/core/main.c:206
#2 <signal handler called>
#3 0x00007f644f19e5f7 in raise () from /lib64/libc.so.6
#4 0x00007f644f19fce8 in abort () from /lib64/libc.so.6
#5 0x0000555656756882 in log_assert_failed (text=text@entry=0x5556567fc545 "dev_autofs_fd >= 0",
file=file@entry=0x5556567fc3b4 "src/core/automount.c", line=line@entry=370,
func=func@entry=0x5556567fd0b4 <__PRETTY_FUNCTION__.17397> "open_ioctl_fd") at src/shared/log.c:754
#6 0x00005556567b064a in open_ioctl_fd (dev_autofs_fd=-1, where=<optimized out>, devid=<optimized out>) at src/core/automount.c:370
#7 0x00005556567b10f6 in automount_send_ready (a=a@entry=0x555656b79110, tokens=0x555656c8b560, status=status@entry=0)
at src/core/automount.c:469
#8 0x00005556567b360e in automount_update_mount (a=0x555656b79110, old_state=old_state@entry=MOUNT_DEAD,
state=state@entry=MOUNT_MOUNTED) at src/core/automount.c:509
#9 0x00005556567ac9e8 in mount_notify_automount (state=MOUNT_MOUNTED, old_state=MOUNT_DEAD, m=0x555656b77000) at src/core/mount.c:588
#10 mount_set_state (m=m@entry=0x555656b77000, state=MOUNT_MOUNTED) at src/core/mount.c:619
#11 0x00005556567ad068 in mount_coldplug (u=0x555656b77000, deferred_work=<optimized out>) at src/core/mount.c:671
#12 0x000055565679c589 in unit_coldplug (u=0x555656b77000, deferred_work=deferred_work@entry=0x555656d3e070) at src/core/unit.c:2886
#13 0x00005556566f031e in manager_coldplug (m=m@entry=0x555656ac5980) at src/core/manager.c:1125
#14 0x00005556566f4a7a in manager_startup (m=0x555656ac5980, serialization=0x555656ac5230, fds=<optimized out>)
at src/core/manager.c:1288
#15 0x00005556566ea4e3 in main (argc=4, argv=0x7ffe78ac9848) at src/core/main.c:1798
(gdb) p *a
$11 = {meta = {manager = 0x555656ac5980, type = UNIT_AUTOMOUNT, load_state = UNIT_LOADED, merged_into = 0x0,
id = 0x555656b29ce0 "proc-sys-fs-binfmt_misc.automount", instance = 0x0, names = 0x555656b79450, dependencies = {0x555656b78500,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x555656b794f0, 0x0, 0x0, 0x0, 0x0, 0x555656b76980, 0x555656b784c0, 0x0, 0x555656b76710,
0x0, 0x0, 0x0, 0x0, 0x555656b769f0, 0x555656b79530}, requires_mounts_for = 0x555656b76750,
description = 0x555656b76eb0 "Arbitrary Executable File Formats File System Automount Point", documentation = 0x555656b76960,
fragment_path = 0x555656b6e540 "/usr/lib/systemd/system/proc-sys-fs-binfmt_misc.automount", source_path = 0x0, dropin_paths = 0x0,
fragment_mtime = 1595213181000000, source_mtime = 0, dropin_mtime = 0, job = 0x0, nop_job = 0x0, job_timeout = 0,
job_timeout_action = EMERGENCY_ACTION_NONE, job_timeout_reboot_arg = 0x0, refs_by_target = 0x0, conditions = 0x555656b769c0,
asserts = 0x0, condition_timestamp = {realtime = 1591608832758220, monotonic = 70060990954163}, assert_timestamp = {
realtime = 1591608832758232, monotonic = 70060990954175}, inactive_exit_timestamp = {realtime = 1591608832758666,
monotonic = 70060990954609}, active_enter_timestamp = {realtime = 1591608832758666, monotonic = 70060990954609},
active_exit_timestamp = {realtime = 1591608832758141, monotonic = 70060990954084}, inactive_enter_timestamp = {
realtime = 1591608832758141, monotonic = 70060990954084}, slice = {source = 0x0, target = 0x0, refs_by_target_next = 0x0,
refs_by_target_prev = 0x0}, units_by_type_next = 0x0, units_by_type_prev = 0x0, has_requires_mounts_for_next = 0x0,
has_requires_mounts_for_prev = 0x0, load_queue_next = 0x0, load_queue_prev = 0x0, dbus_queue_next = 0x0, dbus_queue_prev = 0x0,
cleanup_queue_next = 0x0, cleanup_queue_prev = 0x0, gc_queue_next = 0x555656b78840, gc_queue_prev = 0x555656b796c0,
cgroup_queue_next = 0x0, cgroup_queue_prev = 0x0, target_deps_queue_next = 0x0, target_deps_queue_prev = 0x0, pids = 0x0,
sigchldgen = 0, gc_marker = 0, auto_stop_ratelimit = {interval = 10000000, begin = 0, burst = 16, num = 0}, deserialized_job = -1,
load_error = 0, unit_file_state = _UNIT_FILE_STATE_INVALID, unit_file_preset = -1, cgroup_path = 0x0, cgroup_realized_mask = 0,
cgroup_subtree_mask = 0, cgroup_members_mask = 0, on_failure_job_mode = JOB_REPLACE, stop_when_unneeded = false,
default_dependencies = false, refuse_manual_start = false, refuse_manual_stop = false, allow_isolate = false,
ignore_on_isolate = true, ignore_on_snapshot = false, condition_result = true, assert_result = true, transient = false,
in_load_queue = false, in_dbus_queue = false, in_cleanup_queue = false, in_gc_queue = true, in_cgroup_queue = false,
in_target_deps_queue = false, sent_dbus_new_signal = true, no_gc = false, in_audit = false, cgroup_realized = false,
cgroup_members_mask_valid = true, cgroup_subtree_mask_valid = true}, state = AUTOMOUNT_DEAD,
deserialized_state = AUTOMOUNT_RUNNING, where = 0x555656b76fd0 "/proc/sys/fs/binfmt_misc", timeout_idle_usec = 0, pipe_fd = 24,
pipe_event_source = 0x0, directory_mode = 493, dev_id = 1048609, tokens = 0x555656c8b560, expire_tokens = 0x0,
expire_event_source = 0x0, result = AUTOMOUNT_SUCCESS}
(gdb) p *a->tokens
$10 = {b = {hash_ops = 0x555656a4a6d0 <trivial_hash_ops>, {indirect = {storage = 0x3 <Address 0x3 out of bounds>,
hash_key = '\000' <repeats 15 times>, n_entries = 0, n_buckets = 0, idx_lowest_entry = 4294967040, _pad = "\000\000"},
direct = {storage = "\003", '\000' <repeats 32 times>, "\377\377\377\000\000"}}, type = HASHMAP_TYPE_SET, has_indirect = false,
n_direct_entries = 1, from_pool = false}}
a->tokens is non-empty and dev_autofs_fd==-1,so the assertion fails:
458 static int automount_send_ready(Automount *a, Set *tokens, int status) {
459 _cleanup_close_ int ioctl_fd = -1;
460 unsigned token;
461 int r;
462
463 assert(a);
464 assert(status <= 0);
465
466 if (set_isempty(tokens))
467 return 0;
468
469 ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
2,a->tokens change process analysis
Precondition:
- Ensure that the proc-sys-fs-binfmt_misc.automount service is active;
- Ensure that proc-sys-fs-binfmt_misc.mount is inactive;
- Execute command: ls /proc/sys/fs/binfmt_misc
We can observe the following changes in a->tokens:
step a: first trigger packet.v5_packet.wait_queue_token to be added to a->tokens (via /dev/autofs), as follows:
manager_loop
-> sd_event_dispatch
-> source_dispatch
-> automount_dispatch_io
-> set_put(a->tokens, UINT_TO_PTR(packet.v5_packet.wait_queue_token));
step b:then trigger the deletion of tokens, and a->tokens becomes empty again (via /proc/1/mountinfo), as follows:
manager_loop
-> sd_event_dispatch
-> source_dispatch
-> manager_dispatch_signal_fd
-> manager_dispatch_sigchld
-> mount_sigchld_event
-> mount_set_state
-> mount_notify_automount
-> automount_update_mount
-> automount_send_ready
-> set_steal_first(tokens)
If we continue to execute systemctl daemon-reexec, even though manager->dev_autofs_fd is also -1, because a->tokens is empty, it will return directly, there will be no problem.
But if for some reason, step a is executed, step b is not executed, and then execute systemctl daemon-reexec, it will definitely trigger a failure.
Based on this, we could reproduce it.
3,How to reproduce:
Construct a mount path exceeding 256 characters:
# mkdir -p /run/kata-containers/shared/sandboxes/f0ea3efdb417f442128830e86118cf216d1d236d6f970205a680972bcd062f74/f0ea3efdb417f442128830e86118cf216d1d236d6f970205a680972bcd062f74-a1bed3c11a474518-aaaaaa_xxxx_mix_xxxx_container_role_20200310112829109807.yyyy_container_role_20200310112829109807_15_81
# mkdir -p /tmp/test
# mount --bind /tmp/test /run/kata-containers/shared/sandboxes/f0ea3efdb417f442128830e86118cf216d1d236d6f970205a680972bcd062f74/f0ea3efdb417f442128830e86118cf216d1d236d6f970205a680972bcd062f74-a1bed3c11a474518-aaaaaa_xxxx_mix_xxxx_container_role_20200310112829109807.yyyy_container_role_20200310112829109807_15_81
# ls -l /proc/1/fd | grep mount
/proc/1/mountinfo still exists
# systemctl daemon-reload
# ls -l /proc/1/fd | grep mount
/proc/1/mountinfo will disappear
Ensure that the proc-sys-fs-binfmt_misc.automount is active
Ensure that proc-sys-fs-binfmt_misc.mount is inactive
# ls /proc/sys/fs/binfmt_misc
proc-sys-fs-binfmt_misc.mount will change from inactive to acitve
# umount /proc/sys/fs/binfmt_misc
# stat /proc/sys/fs/binfmt_misc/
It will stay stuck, just like issue https://github.com/systemd/systemd/issues/15221
Finally, execute the following command in another shell terminal:
# systemctl daemon-reexec
systemd will crash immediately
4, How to fix
It may be necessary to merge the following patches:
a, commit ba0d56f55f2073164799be714b5bd1aad94d059a (“mount: don't propagate errors from mount_setup_unit() further up”)
commit ba0d56f ("mount: don't propagate errors from mount_setup_unit() further up")
-> prevent /proc/1/mountinfo from being affected when the mount path exceeds 256 characters;
It has been merged after 73.el7_8.5.
b, The following code snippet of commit fae03ed (“automount: rework propagation between automount and mount units”):
/* Don't propagate state changes from the mount if we are already down */
if (!IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING))
return;
->when the automount status is down, do not propagate the status change.
This patch is also NEEDED, thanks