diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b9f28e8458..c6293caa831 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,19 @@ and this project adheres to This fixes a bug where a microVM with incompatible balloon and guest memory size could be booted, due to the check for this condition happening after Firecracker's configuration was updated. +- [#4259](https://github.com/firecracker-microvm/firecracker/pull/4259): Added a + double fork mechanism in the Jailer to avoid setsid() failures occurred while + running Jailer as the process group leader. However, this changed the + behaviour of Jailer and now the Firecracker process will always have a + different PID than the Jailer process. + [#4436](https://github.com/firecracker-microvm/firecracker/pull/4436): Added a + "Known Limitations" section in the Jailer docs to highlight the above change + in behaviour introduced in PR#4259. + [#4442](https://github.com/firecracker-microvm/firecracker/pull/4442): As a + solution to the change in behaviour introduced in PR#4259, provided a + mechanism to reliably fetch Firecracker PID. With this change, Firecracker + process's PID will always be available in the Jailer's root directory + regardless of whether new_pid_ns was set. ## \[1.6.0\] diff --git a/docs/jailer.md b/docs/jailer.md index 3803dd43b44..2495039b5cf 100644 --- a/docs/jailer.md +++ b/docs/jailer.md @@ -280,10 +280,13 @@ Note: default value for `` is `/run/firecracker.socket`. ### Known limitations - When passing the --daemonize option to Firecracker without the --new-ns-pid - option, the Firecracker process will have a different pid than the Jailer - process. The suggested workaround to get Firecracker process's pid in this - case is using `--new-pid-ns` flag and read Firecracker's pid from the - `firecracker.pid` file present in the jailer's root directory. + option, the Firecracker process will have a different PID than the Jailer + process and killing the Jailer will not kill the Firecracker process. As a + workaround to get Firecracker PID, the Jailer stores the PID of the child + process in the jail root directory inside `.pid` for all cases + regardless of whether `--new-pid-ns` was provided. The suggested way to fetch + Firecracker's PID when using the Jailer is to read the `firecracker.pid` file + present in the Jailer's root directory. ## Caveats diff --git a/src/jailer/src/env.rs b/src/jailer/src/env.rs index 092a6a4a836..0cba8e14b91 100644 --- a/src/jailer/src/env.rs +++ b/src/jailer/src/env.rs @@ -8,7 +8,7 @@ use std::os::unix::fs::PermissionsExt; use std::os::unix::io::AsRawFd; use std::os::unix::process::CommandExt; use std::path::{Component, Path, PathBuf}; -use std::process::{exit, Command, Stdio}; +use std::process::{exit, id, Command, Stdio}; use std::{fmt, io}; use utils::arg_parser::Error::MissingValue; @@ -739,6 +739,7 @@ impl Env { if self.new_pid_ns { self.exec_into_new_pid_ns(chroot_exec_file) } else { + self.save_exec_file_pid(id().try_into().unwrap(), chroot_exec_file.clone())?; Err(JailerError::Exec(self.exec_command(chroot_exec_file))) } } diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 686a2098f1e..af2f9a2a5f3 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -191,7 +191,6 @@ def __init__( self.jailer.jailed_path("/etc/localtime", subdir="etc") self._screen_pid = None - self._screen_firecracker_pid = None self.time_api_requests = global_props.host_linux_version != "6.1" # disable the HTTP API timings as they cause a lot of false positives @@ -247,34 +246,32 @@ def kill(self): # as well as an intentional eye-sore in the test report. LOG.error(self.log_data) - if self.jailer.daemonize: - try: - if self.firecracker_pid: - os.kill(self.firecracker_pid, signal.SIGKILL) - except ProcessLookupError: - LOG.exception("Process not found: %d", self.firecracker_pid) - except FileNotFoundError: - LOG.exception("PID file not found") - - # if microvm was spawned then check if it gets killed - if self._spawned: - # it is observed that we need to wait some time before - # checking if the process is killed. - time.sleep(1) - # filter ps results for the jailer's unique id - rc, stdout, stderr = utils.run_cmd( - f"ps aux | grep {self.jailer.jailer_id}" - ) - # make sure firecracker was killed - assert ( - rc == 0 and stderr == "" and stdout.find("firecracker") == -1 - ), f"Firecracker pid {self.firecracker_pid} was not killed as expected" - else: + try: + if self.firecracker_pid: + os.kill(self.firecracker_pid, signal.SIGKILL) + except ProcessLookupError: + LOG.exception("Process not found: %d", self.firecracker_pid) + except FileNotFoundError: + LOG.exception("PID file not found") + + if self.screen_pid: # Killing screen will send SIGHUP to underlying Firecracker. # Needed to avoid false positives in case kill() is called again. self.expect_kill_by_signal = True utils.run_cmd("kill -9 {} || true".format(self.screen_pid)) + # if microvm was spawned then check if it gets killed + if self._spawned: + # it is observed that we need to wait some time before + # checking if the process is killed. + time.sleep(1) + # filter ps results for the jailer's unique id + rc, stdout, stderr = utils.run_cmd(f"ps aux | grep {self.jailer.jailer_id}") + # make sure firecracker was killed + assert ( + rc == 0 and stderr == "" and stdout.find("firecracker") == -1 + ), f"Firecracker pid {self.firecracker_pid} was not killed as expected" + # Mark the microVM as not spawned, so we avoid trying to kill twice. self._spawned = False @@ -372,22 +369,15 @@ def state(self): return self.api.describe.get().json()["state"] @property - def pid_in_new_ns(self): - """Get the pid of the Firecracker process in the new namespace. + def firecracker_pid(self): + """Return Firecracker's PID - Reads the pid from a file created by jailer with `--new-pid-ns` flag. + Reads the pid from a file created by jailer. """ - # Read the PID stored inside the file. - return int(self.jailer.pid_file.read_text(encoding="ascii")) - - @property - def firecracker_pid(self): - """Return Firecracker's PID""" if not self._spawned: return None - if self.jailer.new_pid_ns: - return self.pid_in_new_ns - return self._screen_firecracker_pid + # Read the PID stored inside the file. + return int(self.jailer.pid_file.read_text(encoding="ascii")) @property def dimensions(self): @@ -553,9 +543,6 @@ def spawn( # Checking the timings requires DEBUG level log messages self.time_api_requests = False - if not self.jailer.daemonize: - self.jailer.new_pid_ns = False - cmd = [str(self.jailer_binary_path)] + self.jailer.construct_param_list() if self._numa_node is not None: node = str(self._numa_node) @@ -576,14 +563,13 @@ def spawn( # Run Firecracker under screen. This is used when we want to access # the serial console. The file will collect the output from # 'screen'ed Firecracker. - screen_pid, binary_pid = utils.start_screen_process( + screen_pid = utils.start_screen_process( self.screen_log, self.screen_session, cmd[0], cmd[1:], ) self._screen_pid = screen_pid - self._screen_firecracker_pid = binary_pid self._spawned = True diff --git a/tests/framework/microvm_helpers.py b/tests/framework/microvm_helpers.py index 26c62ce07f0..851cbeddc4e 100644 --- a/tests/framework/microvm_helpers.py +++ b/tests/framework/microvm_helpers.py @@ -122,6 +122,7 @@ def enable_console(self): self.vm.boot_args = "" self.vm.boot_args += "console=ttyS0 reboot=k panic=1" self.vm.jailer.daemonize = False + self.vm.jailer.new_pid_ns = False def how_to_console(self): """Print how to connect to the VM console""" diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 9ea2e28e99a..3e982fcbfee 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -715,14 +715,7 @@ def start_screen_process(screen_log, session_name, binary_path, binary_params): # Configure screen to flush stdout to file. run_cmd(FLUSH_CMD.format(session=session_name)) - children_count = len(screen_ps.children()) - if children_count != 1: - raise RuntimeError( - f"Failed to retrieve child process id for binary '{binary_path}' " - f"screen session process had [{children_count}]" - ) - - return screen_pid, screen_ps.children()[0].pid + return screen_pid def guest_run_fio_iteration(ssh_connection, iteration): diff --git a/tests/integration_tests/functional/test_initrd.py b/tests/integration_tests/functional/test_initrd.py index 31e9407ed4b..3c48ecd63a5 100644 --- a/tests/integration_tests/functional/test_initrd.py +++ b/tests/integration_tests/functional/test_initrd.py @@ -12,7 +12,7 @@ def test_microvm_initrd_with_serial(uvm_with_initrd): Test that a boot using initrd successfully loads the root filesystem. """ vm = uvm_with_initrd - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.memory_monitor = None diff --git a/tests/integration_tests/functional/test_kernel_cmdline.py b/tests/integration_tests/functional/test_kernel_cmdline.py index 2ba271b51fc..14e369790f1 100644 --- a/tests/integration_tests/functional/test_kernel_cmdline.py +++ b/tests/integration_tests/functional/test_kernel_cmdline.py @@ -13,7 +13,7 @@ def test_init_params(uvm_plain): altered or misplaced. """ vm = uvm_plain - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.memory_monitor = None diff --git a/tests/integration_tests/functional/test_serial_io.py b/tests/integration_tests/functional/test_serial_io.py index 9ff76b96e87..b8cd7f9118b 100644 --- a/tests/integration_tests/functional/test_serial_io.py +++ b/tests/integration_tests/functional/test_serial_io.py @@ -50,7 +50,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory): Serial I/O after restoring from a snapshot. """ microvm = uvm_plain - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() microvm.basic_config( vcpu_count=2, @@ -71,7 +71,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory): # Load microVM clone from snapshot. vm = microvm_factory.build() - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.restore_from_snapshot(snapshot, resume=True) serial = Serial(vm) @@ -91,7 +91,7 @@ def test_serial_console_login(uvm_plain_any): Test serial console login. """ microvm = uvm_plain_any - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() # We don't need to monitor the memory for this test because we are @@ -139,7 +139,7 @@ def test_serial_dos(uvm_plain_any): Test serial console behavior under DoS. """ microvm = uvm_plain_any - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() # Set up the microVM with 1 vCPU and a serial console. @@ -169,7 +169,7 @@ def test_serial_block(uvm_plain_any): Test that writing to stdout never blocks the vCPU thread. """ test_microvm = uvm_plain_any - test_microvm.jailer.daemonize = False + test_microvm.help.enable_console() test_microvm.spawn() # Set up the microVM with 1 vCPU so we make sure the vCPU thread # responsible for the SSH connection will also run the serial. diff --git a/tests/integration_tests/security/test_jail.py b/tests/integration_tests/security/test_jail.py index 6956b1ace94..ff4fd6ff3dd 100644 --- a/tests/integration_tests/security/test_jail.py +++ b/tests/integration_tests/security/test_jail.py @@ -581,3 +581,35 @@ def test_new_pid_namespace(uvm_plain): assert len(nstgid_list) == 2 assert int(nstgid_list[1]) == 1 assert int(nstgid_list[0]) == fc_pid + + +@pytest.mark.parametrize( + "daemonize", + [True, False], +) +@pytest.mark.parametrize( + "new_pid_ns", + [True, False], +) +def test_firecracker_kill_by_pid(uvm_plain, daemonize, new_pid_ns): + """ + Test that Firecracker is spawned in a new PID namespace if requested. + """ + microvm = uvm_plain + microvm.jailer.daemonize = daemonize + microvm.jailer.new_pid_ns = new_pid_ns + microvm.spawn() + microvm.basic_config() + microvm.add_net_iface() + microvm.start() + + # verify the guest is active + exit_code, _, _ = microvm.ssh.run("ls") + assert exit_code == 0 + + # before killing microvm make sure the Jailer config is what we set it to be. + assert ( + microvm.jailer.daemonize == daemonize + and microvm.jailer.new_pid_ns == new_pid_ns + ) + microvm.kill()