Andrei Borzenkov
2018-09-23 07:38:02 UTC
Dracut /shutdown script first tries to kill all processes still running
off old root. Unfortunately this fails for special user process that
runs bpfilter because it does not include reference to /oldroot in
places where dracut looks for in kilall_proc_mountpoint()
10:~ # ps -ef | fgrep '[none]'
root 984 2 0 09:46 ? 00:00:00 [none]
/proc/984:
total 0
dr-xr-xr-x 2 root 0 0 Sep 23 10:11 attr
-r-------- 1 root 0 0 Sep 23 10:11 auxv
-r--r--r-- 1 root 0 0 Sep 23 10:11 cgroup
--w------- 1 root 0 0 Sep 23 10:11 clear_refs
-r--r--r-- 1 root 0 0 Sep 23 10:10 cmdline
-rw-r--r-- 1 root 0 0 Sep 23 10:11 comm
-rw-r--r-- 1 root 0 0 Sep 23 10:11 coredump_filter
-r--r--r-- 1 root 0 0 Sep 23 10:11 cpuset
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 cwd -> /
-r-------- 1 root 0 0 Sep 23 10:11 environ
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 exe -> / (deleted)
-rw-r--r-- 1 root 0 0 Sep 23 10:11 fail-nth
dr-x------ 2 root 0 0 Sep 23 10:11 fd
dr-x------ 2 root 0 0 Sep 23 10:11 fdinfo
-rw-r--r-- 1 root 0 0 Sep 23 10:11 gid_map
-r-------- 1 root 0 0 Sep 23 10:11 io
-r--r--r-- 1 root 0 0 Sep 23 10:11 latency
-r--r--r-- 1 root 0 0 Sep 23 10:11 limits
-rw-r--r-- 1 root 0 0 Sep 23 10:11 loginuid
-rw-r--r-- 1 root 0 0 Sep 23 10:11 make-it-fail
dr-x------ 2 root 0 0 Sep 23 10:11 map_files
-r--r--r-- 1 root 0 0 Sep 23 10:10 maps
-rw------- 1 root 0 0 Sep 23 10:11 mem
-r--r--r-- 1 root 0 0 Sep 23 10:11 mountinfo
-r--r--r-- 1 root 0 0 Sep 23 10:11 mounts
-r-------- 1 root 0 0 Sep 23 10:11 mountstats
dr-xr-xr-x 6 root 0 0 Sep 23 10:11 net
dr-x--x--x 2 root 0 0 Sep 23 10:11 ns
-r--r--r-- 1 root 0 0 Sep 23 10:11 numa_maps
-rw-r--r-- 1 root 0 0 Sep 23 10:11 oom_adj
-r--r--r-- 1 root 0 0 Sep 23 10:11 oom_score
-rw-r--r-- 1 root 0 0 Sep 23 10:11 oom_score_adj
-r-------- 1 root 0 0 Sep 23 10:11 pagemap
-r-------- 1 root 0 0 Sep 23 10:11 patch_state
-r-------- 1 root 0 0 Sep 23 10:11 personality
-rw-r--r-- 1 root 0 0 Sep 23 10:11 projid_map
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 root -> /
-rw-r--r-- 1 root 0 0 Sep 23 10:11 sched
-r--r--r-- 1 root 0 0 Sep 23 10:11 schedstat
-r--r--r-- 1 root 0 0 Sep 23 10:11 sessionid
-rw-r--r-- 1 root 0 0 Sep 23 10:11 setgroups
-r--r--r-- 1 root 0 0 Sep 23 10:11 smaps
-r--r--r-- 1 root 0 0 Sep 23 10:11 smaps_rollup
-r-------- 1 root 0 0 Sep 23 10:11 stack
-r--r--r-- 1 root 0 0 Sep 23 10:10 stat
-r--r--r-- 1 root 0 0 Sep 23 10:11 statm
-r--r--r-- 1 root 0 0 Sep 23 10:10 status
-r-------- 1 root 0 0 Sep 23 10:11 syscall
dr-xr-xr-x 3 root 0 0 Sep 23 10:11 task
-r--r--r-- 1 root 0 0 Sep 23 10:11 timers
-rw-rw-rw- 1 root 0 0 Sep 23 10:11 timerslack_ns
-rw-r--r-- 1 root 0 0 Sep 23 10:11 uid_map
-r--r--r-- 1 root 0 0 Sep 23 10:11 wchan
/proc/984/fd:
total 0
lr-x------ 1 root 0 64 Sep 23 10:11 0 -> pipe:[19409]
l-wx------ 1 root 0 64 Sep 23 10:11 1 -> pipe:[19410]
lrwx------ 1 root 0 64 Sep 23 10:11 2 -> /oldsys/dev/console
But it does contain reference to /oldroot in its mapped libraries list
(/proc/984/maps):
563b63002000-563b63003000 r--p 00000000 00:05 19404
/ (deleted)
563b63003000-563b63004000 r-xp 00001000 00:05 19404
/ (deleted)
563b63004000-563b63005000 r--p 00002000 00:05 19404
/ (deleted)
563b63005000-563b63006000 r--p 00002000 00:05 19404
/ (deleted)
563b63006000-563b63007000 rw-p 00003000 00:05 19404
/ (deleted)
563b63fb4000-563b63fd5000 rw-p 00000000 00:00 0
[heap]
7fa3a46cc000-7fa3a4882000 r-xp 00000000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4882000-7fa3a4a82000 ---p 001b6000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a82000-7fa3a4a86000 r--p 001b6000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a86000-7fa3a4a88000 rw-p 001ba000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a88000-7fa3a4a8c000 rw-p 00000000 00:00 0
7fa3a4a8c000-7fa3a4ab1000 r-xp 00000000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4ca7000-7fa3a4ca9000 rw-p 00000000 00:00 0
7fa3a4cb1000-7fa3a4cb2000 r--p 00025000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4cb2000-7fa3a4cb3000 rw-p 00026000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4cb3000-7fa3a4cb4000 rw-p 00000000 00:00 0
7ffea03b4000-7ffea03d5000 rw-p 00000000 00:00 0
[stack]
7ffea03df000-7ffea03e2000 r--p 00000000 00:00 0
[vvar]
7ffea03e2000-7ffea03e4000 r-xp 00000000 00:00 0
[vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
[vsyscall]
So the quick fix would be to extend check for root references to also
look into /proc/$PID/maps. Something like (verified):
--- dracut-lib.sh.orig 2018-09-18 13:24:49.000000000 +0300
+++ dracut-lib.sh 2018-09-23 10:31:13.300054544 +0300
@@ -118,7 +118,7 @@ killall_proc_mountpoint() {
esac
[ -e "/proc/$_pid/exe" ] || continue
[ -e "/proc/$_pid/root" ] || continue
- strstr "$(ls -l -- "/proc/$_pid" "/proc/$_pid/fd" 2>/dev/null)"
"$1" && kill -9 "$_pid"
+ strstr "$(ls -l -- "/proc/$_pid" "/proc/$_pid/fd" 2>/dev/null;
cat "/proc/$_pid/maps" 2> /dev/null)" "$1" && kill -9 "$_pid"
done
}
Note that there are also other places that use similar check (most
obvious being /shutdown script itself) which likely need uniform fix. If
there are no objection, I would introduce helper function to do check
and use it everywhere instead of open coding.
off old root. Unfortunately this fails for special user process that
runs bpfilter because it does not include reference to /oldroot in
places where dracut looks for in kilall_proc_mountpoint()
10:~ # ps -ef | fgrep '[none]'
root 984 2 0 09:46 ? 00:00:00 [none]
/proc/984:
total 0
dr-xr-xr-x 2 root 0 0 Sep 23 10:11 attr
-r-------- 1 root 0 0 Sep 23 10:11 auxv
-r--r--r-- 1 root 0 0 Sep 23 10:11 cgroup
--w------- 1 root 0 0 Sep 23 10:11 clear_refs
-r--r--r-- 1 root 0 0 Sep 23 10:10 cmdline
-rw-r--r-- 1 root 0 0 Sep 23 10:11 comm
-rw-r--r-- 1 root 0 0 Sep 23 10:11 coredump_filter
-r--r--r-- 1 root 0 0 Sep 23 10:11 cpuset
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 cwd -> /
-r-------- 1 root 0 0 Sep 23 10:11 environ
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 exe -> / (deleted)
-rw-r--r-- 1 root 0 0 Sep 23 10:11 fail-nth
dr-x------ 2 root 0 0 Sep 23 10:11 fd
dr-x------ 2 root 0 0 Sep 23 10:11 fdinfo
-rw-r--r-- 1 root 0 0 Sep 23 10:11 gid_map
-r-------- 1 root 0 0 Sep 23 10:11 io
-r--r--r-- 1 root 0 0 Sep 23 10:11 latency
-r--r--r-- 1 root 0 0 Sep 23 10:11 limits
-rw-r--r-- 1 root 0 0 Sep 23 10:11 loginuid
-rw-r--r-- 1 root 0 0 Sep 23 10:11 make-it-fail
dr-x------ 2 root 0 0 Sep 23 10:11 map_files
-r--r--r-- 1 root 0 0 Sep 23 10:10 maps
-rw------- 1 root 0 0 Sep 23 10:11 mem
-r--r--r-- 1 root 0 0 Sep 23 10:11 mountinfo
-r--r--r-- 1 root 0 0 Sep 23 10:11 mounts
-r-------- 1 root 0 0 Sep 23 10:11 mountstats
dr-xr-xr-x 6 root 0 0 Sep 23 10:11 net
dr-x--x--x 2 root 0 0 Sep 23 10:11 ns
-r--r--r-- 1 root 0 0 Sep 23 10:11 numa_maps
-rw-r--r-- 1 root 0 0 Sep 23 10:11 oom_adj
-r--r--r-- 1 root 0 0 Sep 23 10:11 oom_score
-rw-r--r-- 1 root 0 0 Sep 23 10:11 oom_score_adj
-r-------- 1 root 0 0 Sep 23 10:11 pagemap
-r-------- 1 root 0 0 Sep 23 10:11 patch_state
-r-------- 1 root 0 0 Sep 23 10:11 personality
-rw-r--r-- 1 root 0 0 Sep 23 10:11 projid_map
lrwxrwxrwx 1 root 0 0 Sep 23 10:11 root -> /
-rw-r--r-- 1 root 0 0 Sep 23 10:11 sched
-r--r--r-- 1 root 0 0 Sep 23 10:11 schedstat
-r--r--r-- 1 root 0 0 Sep 23 10:11 sessionid
-rw-r--r-- 1 root 0 0 Sep 23 10:11 setgroups
-r--r--r-- 1 root 0 0 Sep 23 10:11 smaps
-r--r--r-- 1 root 0 0 Sep 23 10:11 smaps_rollup
-r-------- 1 root 0 0 Sep 23 10:11 stack
-r--r--r-- 1 root 0 0 Sep 23 10:10 stat
-r--r--r-- 1 root 0 0 Sep 23 10:11 statm
-r--r--r-- 1 root 0 0 Sep 23 10:10 status
-r-------- 1 root 0 0 Sep 23 10:11 syscall
dr-xr-xr-x 3 root 0 0 Sep 23 10:11 task
-r--r--r-- 1 root 0 0 Sep 23 10:11 timers
-rw-rw-rw- 1 root 0 0 Sep 23 10:11 timerslack_ns
-rw-r--r-- 1 root 0 0 Sep 23 10:11 uid_map
-r--r--r-- 1 root 0 0 Sep 23 10:11 wchan
/proc/984/fd:
total 0
lr-x------ 1 root 0 64 Sep 23 10:11 0 -> pipe:[19409]
l-wx------ 1 root 0 64 Sep 23 10:11 1 -> pipe:[19410]
lrwx------ 1 root 0 64 Sep 23 10:11 2 -> /oldsys/dev/console
But it does contain reference to /oldroot in its mapped libraries list
(/proc/984/maps):
563b63002000-563b63003000 r--p 00000000 00:05 19404
/ (deleted)
563b63003000-563b63004000 r-xp 00001000 00:05 19404
/ (deleted)
563b63004000-563b63005000 r--p 00002000 00:05 19404
/ (deleted)
563b63005000-563b63006000 r--p 00002000 00:05 19404
/ (deleted)
563b63006000-563b63007000 rw-p 00003000 00:05 19404
/ (deleted)
563b63fb4000-563b63fd5000 rw-p 00000000 00:00 0
[heap]
7fa3a46cc000-7fa3a4882000 r-xp 00000000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4882000-7fa3a4a82000 ---p 001b6000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a82000-7fa3a4a86000 r--p 001b6000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a86000-7fa3a4a88000 rw-p 001ba000 00:2a 7728
/oldroot/lib64/libc-2.27.so
7fa3a4a88000-7fa3a4a8c000 rw-p 00000000 00:00 0
7fa3a4a8c000-7fa3a4ab1000 r-xp 00000000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4ca7000-7fa3a4ca9000 rw-p 00000000 00:00 0
7fa3a4cb1000-7fa3a4cb2000 r--p 00025000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4cb2000-7fa3a4cb3000 rw-p 00026000 00:2a 7720
/oldroot/lib64/ld-2.27.so
7fa3a4cb3000-7fa3a4cb4000 rw-p 00000000 00:00 0
7ffea03b4000-7ffea03d5000 rw-p 00000000 00:00 0
[stack]
7ffea03df000-7ffea03e2000 r--p 00000000 00:00 0
[vvar]
7ffea03e2000-7ffea03e4000 r-xp 00000000 00:00 0
[vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
[vsyscall]
So the quick fix would be to extend check for root references to also
look into /proc/$PID/maps. Something like (verified):
--- dracut-lib.sh.orig 2018-09-18 13:24:49.000000000 +0300
+++ dracut-lib.sh 2018-09-23 10:31:13.300054544 +0300
@@ -118,7 +118,7 @@ killall_proc_mountpoint() {
esac
[ -e "/proc/$_pid/exe" ] || continue
[ -e "/proc/$_pid/root" ] || continue
- strstr "$(ls -l -- "/proc/$_pid" "/proc/$_pid/fd" 2>/dev/null)"
"$1" && kill -9 "$_pid"
+ strstr "$(ls -l -- "/proc/$_pid" "/proc/$_pid/fd" 2>/dev/null;
cat "/proc/$_pid/maps" 2> /dev/null)" "$1" && kill -9 "$_pid"
done
}
Note that there are also other places that use similar check (most
obvious being /shutdown script itself) which likely need uniform fix. If
there are no objection, I would introduce helper function to do check
and use it everywhere instead of open coding.