Skip to content

Save Firecracker PID in Jailer's root directory #4442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,19 @@ and this project adheres to
This fixes a bug where a microVM with incompatible balloon and guest memory
size could be booted, due to the check for this condition happening after
Firecracker's configuration was updated.
- [#4259](https://github.com/firecracker-microvm/firecracker/pull/4259): Added a
double fork mechanism in the Jailer to avoid setsid() failures occurred while
running Jailer as the process group leader. However, this changed the
behaviour of Jailer and now the Firecracker process will always have a
different PID than the Jailer process.
[#4436](https://github.com/firecracker-microvm/firecracker/pull/4436): Added a
"Known Limitations" section in the Jailer docs to highlight the above change
in behaviour introduced in PR#4259.
[#4442](https://github.com/firecracker-microvm/firecracker/pull/4442): As a
solution to the change in behaviour introduced in PR#4259, provided a
mechanism to reliably fetch Firecracker PID. With this change, Firecracker
process's PID will always be available in the Jailer's root directory
regardless of whether new_pid_ns was set.

## \[1.6.0\]

Expand Down
11 changes: 7 additions & 4 deletions docs/jailer.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,13 @@ Note: default value for `<api-sock>` is `/run/firecracker.socket`.
### Known limitations

- When passing the --daemonize option to Firecracker without the --new-ns-pid
option, the Firecracker process will have a different pid than the Jailer
process. The suggested workaround to get Firecracker process's pid in this
case is using `--new-pid-ns` flag and read Firecracker's pid from the
`firecracker.pid` file present in the jailer's root directory.
option, the Firecracker process will have a different PID than the Jailer
process and killing the Jailer will not kill the Firecracker process. As a
workaround to get Firecracker PID, the Jailer stores the PID of the child
process in the jail root directory inside `<exec_file_name>.pid` for all cases
regardless of whether `--new-pid-ns` was provided. The suggested way to fetch
Firecracker's PID when using the Jailer is to read the `firecracker.pid` file
present in the Jailer's root directory.

## Caveats

Expand Down
3 changes: 2 additions & 1 deletion src/jailer/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::os::unix::fs::PermissionsExt;
use std::os::unix::io::AsRawFd;
use std::os::unix::process::CommandExt;
use std::path::{Component, Path, PathBuf};
use std::process::{exit, Command, Stdio};
use std::process::{exit, id, Command, Stdio};
use std::{fmt, io};

use utils::arg_parser::Error::MissingValue;
Expand Down Expand Up @@ -739,6 +739,7 @@ impl Env {
if self.new_pid_ns {
self.exec_into_new_pid_ns(chroot_exec_file)
} else {
self.save_exec_file_pid(id().try_into().unwrap(), chroot_exec_file.clone())?;
Err(JailerError::Exec(self.exec_command(chroot_exec_file)))
}
}
Expand Down
68 changes: 27 additions & 41 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ def __init__(
self.jailer.jailed_path("/etc/localtime", subdir="etc")

self._screen_pid = None
self._screen_firecracker_pid = None

self.time_api_requests = global_props.host_linux_version != "6.1"
# disable the HTTP API timings as they cause a lot of false positives
Expand Down Expand Up @@ -247,34 +246,32 @@ def kill(self):
# as well as an intentional eye-sore in the test report.
LOG.error(self.log_data)

if self.jailer.daemonize:
try:
if self.firecracker_pid:
os.kill(self.firecracker_pid, signal.SIGKILL)
except ProcessLookupError:
LOG.exception("Process not found: %d", self.firecracker_pid)
except FileNotFoundError:
LOG.exception("PID file not found")

# if microvm was spawned then check if it gets killed
if self._spawned:
# it is observed that we need to wait some time before
# checking if the process is killed.
time.sleep(1)
# filter ps results for the jailer's unique id
rc, stdout, stderr = utils.run_cmd(
f"ps aux | grep {self.jailer.jailer_id}"
)
# make sure firecracker was killed
assert (
rc == 0 and stderr == "" and stdout.find("firecracker") == -1
), f"Firecracker pid {self.firecracker_pid} was not killed as expected"
else:
try:
if self.firecracker_pid:
os.kill(self.firecracker_pid, signal.SIGKILL)
except ProcessLookupError:
LOG.exception("Process not found: %d", self.firecracker_pid)
except FileNotFoundError:
LOG.exception("PID file not found")

if self.screen_pid:
# Killing screen will send SIGHUP to underlying Firecracker.
# Needed to avoid false positives in case kill() is called again.
self.expect_kill_by_signal = True
utils.run_cmd("kill -9 {} || true".format(self.screen_pid))

# if microvm was spawned then check if it gets killed
if self._spawned:
# it is observed that we need to wait some time before
# checking if the process is killed.
time.sleep(1)
# filter ps results for the jailer's unique id
rc, stdout, stderr = utils.run_cmd(f"ps aux | grep {self.jailer.jailer_id}")
# make sure firecracker was killed
assert (
rc == 0 and stderr == "" and stdout.find("firecracker") == -1
), f"Firecracker pid {self.firecracker_pid} was not killed as expected"

# Mark the microVM as not spawned, so we avoid trying to kill twice.
self._spawned = False

Expand Down Expand Up @@ -372,22 +369,15 @@ def state(self):
return self.api.describe.get().json()["state"]

@property
def pid_in_new_ns(self):
"""Get the pid of the Firecracker process in the new namespace.
def firecracker_pid(self):
"""Return Firecracker's PID

Reads the pid from a file created by jailer with `--new-pid-ns` flag.
Reads the pid from a file created by jailer.
"""
# Read the PID stored inside the file.
return int(self.jailer.pid_file.read_text(encoding="ascii"))

@property
def firecracker_pid(self):
"""Return Firecracker's PID"""
if not self._spawned:
return None
if self.jailer.new_pid_ns:
return self.pid_in_new_ns
return self._screen_firecracker_pid
# Read the PID stored inside the file.
return int(self.jailer.pid_file.read_text(encoding="ascii"))

@property
def dimensions(self):
Expand Down Expand Up @@ -553,9 +543,6 @@ def spawn(
# Checking the timings requires DEBUG level log messages
self.time_api_requests = False

if not self.jailer.daemonize:
self.jailer.new_pid_ns = False

cmd = [str(self.jailer_binary_path)] + self.jailer.construct_param_list()
if self._numa_node is not None:
node = str(self._numa_node)
Expand All @@ -576,14 +563,13 @@ def spawn(
# Run Firecracker under screen. This is used when we want to access
# the serial console. The file will collect the output from
# 'screen'ed Firecracker.
screen_pid, binary_pid = utils.start_screen_process(
screen_pid = utils.start_screen_process(
self.screen_log,
self.screen_session,
cmd[0],
cmd[1:],
)
self._screen_pid = screen_pid
self._screen_firecracker_pid = binary_pid

self._spawned = True

Expand Down
1 change: 1 addition & 0 deletions tests/framework/microvm_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def enable_console(self):
self.vm.boot_args = ""
self.vm.boot_args += "console=ttyS0 reboot=k panic=1"
self.vm.jailer.daemonize = False
self.vm.jailer.new_pid_ns = False

def how_to_console(self):
"""Print how to connect to the VM console"""
Expand Down
9 changes: 1 addition & 8 deletions tests/framework/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,14 +715,7 @@ def start_screen_process(screen_log, session_name, binary_path, binary_params):
# Configure screen to flush stdout to file.
run_cmd(FLUSH_CMD.format(session=session_name))

children_count = len(screen_ps.children())
if children_count != 1:
raise RuntimeError(
f"Failed to retrieve child process id for binary '{binary_path}' "
f"screen session process had [{children_count}]"
)

return screen_pid, screen_ps.children()[0].pid
return screen_pid


def guest_run_fio_iteration(ssh_connection, iteration):
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/functional/test_initrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_microvm_initrd_with_serial(uvm_with_initrd):
Test that a boot using initrd successfully loads the root filesystem.
"""
vm = uvm_with_initrd
vm.jailer.daemonize = False
vm.help.enable_console()
vm.spawn()
vm.memory_monitor = None

Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/functional/test_kernel_cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_init_params(uvm_plain):
altered or misplaced.
"""
vm = uvm_plain
vm.jailer.daemonize = False
vm.help.enable_console()
vm.spawn()
vm.memory_monitor = None

Expand Down
10 changes: 5 additions & 5 deletions tests/integration_tests/functional/test_serial_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory):
Serial I/O after restoring from a snapshot.
"""
microvm = uvm_plain
microvm.jailer.daemonize = False
microvm.help.enable_console()
microvm.spawn()
microvm.basic_config(
vcpu_count=2,
Expand All @@ -71,7 +71,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory):

# Load microVM clone from snapshot.
vm = microvm_factory.build()
vm.jailer.daemonize = False
vm.help.enable_console()
vm.spawn()
vm.restore_from_snapshot(snapshot, resume=True)
serial = Serial(vm)
Expand All @@ -91,7 +91,7 @@ def test_serial_console_login(uvm_plain_any):
Test serial console login.
"""
microvm = uvm_plain_any
microvm.jailer.daemonize = False
microvm.help.enable_console()
microvm.spawn()

# We don't need to monitor the memory for this test because we are
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_serial_dos(uvm_plain_any):
Test serial console behavior under DoS.
"""
microvm = uvm_plain_any
microvm.jailer.daemonize = False
microvm.help.enable_console()
microvm.spawn()

# Set up the microVM with 1 vCPU and a serial console.
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_serial_block(uvm_plain_any):
Test that writing to stdout never blocks the vCPU thread.
"""
test_microvm = uvm_plain_any
test_microvm.jailer.daemonize = False
test_microvm.help.enable_console()
test_microvm.spawn()
# Set up the microVM with 1 vCPU so we make sure the vCPU thread
# responsible for the SSH connection will also run the serial.
Expand Down
32 changes: 32 additions & 0 deletions tests/integration_tests/security/test_jail.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,35 @@ def test_new_pid_namespace(uvm_plain):
assert len(nstgid_list) == 2
assert int(nstgid_list[1]) == 1
assert int(nstgid_list[0]) == fc_pid


@pytest.mark.parametrize(
"daemonize",
[True, False],
)
@pytest.mark.parametrize(
"new_pid_ns",
[True, False],
)
def test_firecracker_kill_by_pid(uvm_plain, daemonize, new_pid_ns):
"""
Test that Firecracker is spawned in a new PID namespace if requested.
"""
microvm = uvm_plain
microvm.jailer.daemonize = daemonize
microvm.jailer.new_pid_ns = new_pid_ns
microvm.spawn()
microvm.basic_config()
microvm.add_net_iface()
microvm.start()

# verify the guest is active
exit_code, _, _ = microvm.ssh.run("ls")
assert exit_code == 0

# before killing microvm make sure the Jailer config is what we set it to be.
assert (
microvm.jailer.daemonize == daemonize
and microvm.jailer.new_pid_ns == new_pid_ns
)
microvm.kill()