From d63eb7a65ffaaae0409d15ed55d99ecbd29bc572 Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Sun, 11 Feb 2024 00:28:13 +0000 Subject: [PATCH 1/7] fix: save Firecracker PID in Jailer root directory da92bf6d4afa678565dcf9df2e4e00d2fa273d96 commit changed behaviour of the PID returned by the Jailer such that when passing the --daemonize option to Firecracker without the --new-ns-pid option, the Firecracker process will have a different PID than the Jailer. This makes it difficult to find the PID of Firecracker process from the system so, save the Firecracker PID in Jailer root directory as a reliable workaround to fetch Firecracker PID. With this change, Firecracker process's PID will always be available in Jailer's root directory regardless of whether new_pid_ns was set. Signed-off-by: Sudan Landge --- src/jailer/src/env.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jailer/src/env.rs b/src/jailer/src/env.rs index 092a6a4a836..0cba8e14b91 100644 --- a/src/jailer/src/env.rs +++ b/src/jailer/src/env.rs @@ -8,7 +8,7 @@ use std::os::unix::fs::PermissionsExt; use std::os::unix::io::AsRawFd; use std::os::unix::process::CommandExt; use std::path::{Component, Path, PathBuf}; -use std::process::{exit, Command, Stdio}; +use std::process::{exit, id, Command, Stdio}; use std::{fmt, io}; use utils::arg_parser::Error::MissingValue; @@ -739,6 +739,7 @@ impl Env { if self.new_pid_ns { self.exec_into_new_pid_ns(chroot_exec_file) } else { + self.save_exec_file_pid(id().try_into().unwrap(), chroot_exec_file.clone())?; Err(JailerError::Exec(self.exec_command(chroot_exec_file))) } } From a3bcd1d961f62779f8ee8be097a0e456d1a835bc Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Sun, 11 Feb 2024 20:58:43 +0000 Subject: [PATCH 2/7] tests: always collect Firecracker PID from file Jailer now saves Firecracker PID in a file in Jailer's root directory so refactor the code to read the FC pid always from this file regardless of whether the new_pid_ns flag was set. Signed-off-by: Sudan Landge --- tests/framework/microvm.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 686a2098f1e..80363dbb001 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -372,22 +372,15 @@ def state(self): return self.api.describe.get().json()["state"] @property - def pid_in_new_ns(self): - """Get the pid of the Firecracker process in the new namespace. + def firecracker_pid(self): + """Return Firecracker's PID - Reads the pid from a file created by jailer with `--new-pid-ns` flag. + Reads the pid from a file created by jailer. """ - # Read the PID stored inside the file. - return int(self.jailer.pid_file.read_text(encoding="ascii")) - - @property - def firecracker_pid(self): - """Return Firecracker's PID""" if not self._spawned: return None - if self.jailer.new_pid_ns: - return self.pid_in_new_ns - return self._screen_firecracker_pid + # Read the PID stored inside the file. + return int(self.jailer.pid_file.read_text(encoding="ascii")) @property def dimensions(self): From 6bf443237e9a5c4a110d11c24ccf742ca25bf250 Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Sun, 11 Feb 2024 21:12:45 +0000 Subject: [PATCH 3/7] refactor: remove usage of _screen_firecracker_pid For the case where Jailer is no daemonized but new_pid_ns flag is set, we won't be able to fetch Firecracker pid from the screen process and the right way to get Firecracker PID is by calling Microvm's self.firecracker_pid so, remove usage _screen_firecracker_pid. The change to kill screen process if screen_pid is valid is done to make the next commit more readable. Signed-off-by: Sudan Landge --- tests/framework/microvm.py | 7 +++---- tests/framework/utils.py | 9 +-------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 80363dbb001..bb7d2b31aa0 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -191,7 +191,6 @@ def __init__( self.jailer.jailed_path("/etc/localtime", subdir="etc") self._screen_pid = None - self._screen_firecracker_pid = None self.time_api_requests = global_props.host_linux_version != "6.1" # disable the HTTP API timings as they cause a lot of false positives @@ -269,7 +268,8 @@ def kill(self): assert ( rc == 0 and stderr == "" and stdout.find("firecracker") == -1 ), f"Firecracker pid {self.firecracker_pid} was not killed as expected" - else: + + if self.screen_pid: # Killing screen will send SIGHUP to underlying Firecracker. # Needed to avoid false positives in case kill() is called again. self.expect_kill_by_signal = True @@ -569,14 +569,13 @@ def spawn( # Run Firecracker under screen. This is used when we want to access # the serial console. The file will collect the output from # 'screen'ed Firecracker. - screen_pid, binary_pid = utils.start_screen_process( + screen_pid = utils.start_screen_process( self.screen_log, self.screen_session, cmd[0], cmd[1:], ) self._screen_pid = screen_pid - self._screen_firecracker_pid = binary_pid self._spawned = True diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 9ea2e28e99a..3e982fcbfee 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -715,14 +715,7 @@ def start_screen_process(screen_log, session_name, binary_path, binary_params): # Configure screen to flush stdout to file. run_cmd(FLUSH_CMD.format(session=session_name)) - children_count = len(screen_ps.children()) - if children_count != 1: - raise RuntimeError( - f"Failed to retrieve child process id for binary '{binary_path}' " - f"screen session process had [{children_count}]" - ) - - return screen_pid, screen_ps.children()[0].pid + return screen_pid def guest_run_fio_iteration(ssh_connection, iteration): From 22b1019aa39752f0f94fe918566b1d363a01cc27 Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Sun, 11 Feb 2024 21:28:09 +0000 Subject: [PATCH 4/7] refactor: use common method to kill and detect Firecracker Since Microvm's self.firecracker_pid is valid in all cases, use it to kill Firecracker in all cases. Use screen_pid to identify and kill if screen process was used. Make the logic, which detect if Firecracker PID was killed, common for all (daemonize or not) cases. Signed-off-by: Sudan Landge --- tests/framework/microvm.py | 41 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index bb7d2b31aa0..cb705e6326f 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -246,28 +246,13 @@ def kill(self): # as well as an intentional eye-sore in the test report. LOG.error(self.log_data) - if self.jailer.daemonize: - try: - if self.firecracker_pid: - os.kill(self.firecracker_pid, signal.SIGKILL) - except ProcessLookupError: - LOG.exception("Process not found: %d", self.firecracker_pid) - except FileNotFoundError: - LOG.exception("PID file not found") - - # if microvm was spawned then check if it gets killed - if self._spawned: - # it is observed that we need to wait some time before - # checking if the process is killed. - time.sleep(1) - # filter ps results for the jailer's unique id - rc, stdout, stderr = utils.run_cmd( - f"ps aux | grep {self.jailer.jailer_id}" - ) - # make sure firecracker was killed - assert ( - rc == 0 and stderr == "" and stdout.find("firecracker") == -1 - ), f"Firecracker pid {self.firecracker_pid} was not killed as expected" + try: + if self.firecracker_pid: + os.kill(self.firecracker_pid, signal.SIGKILL) + except ProcessLookupError: + LOG.exception("Process not found: %d", self.firecracker_pid) + except FileNotFoundError: + LOG.exception("PID file not found") if self.screen_pid: # Killing screen will send SIGHUP to underlying Firecracker. @@ -275,6 +260,18 @@ def kill(self): self.expect_kill_by_signal = True utils.run_cmd("kill -9 {} || true".format(self.screen_pid)) + # if microvm was spawned then check if it gets killed + if self._spawned: + # it is observed that we need to wait some time before + # checking if the process is killed. + time.sleep(1) + # filter ps results for the jailer's unique id + rc, stdout, stderr = utils.run_cmd(f"ps aux | grep {self.jailer.jailer_id}") + # make sure firecracker was killed + assert ( + rc == 0 and stderr == "" and stdout.find("firecracker") == -1 + ), f"Firecracker pid {self.firecracker_pid} was not killed as expected" + # Mark the microVM as not spawned, so we avoid trying to kill twice. self._spawned = False From c30632fd92e9900415964f6684bf2a1721eaa525 Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Tue, 13 Feb 2024 11:25:54 +0000 Subject: [PATCH 5/7] refactor: explicitly set new_pid_ns wherever required The code assumed that if daemonize is set to False for the Jailer then, new_pid_ns should also be set to False. This assumption makes it difficult to add tests which require "daemonize=False and new_pid_ns=True". So remove the assumption and call enable_console() to explicitly which explicitly explains what functionality is needed. Note: apart from setting daemonize and new_pid_ns to False, `help.enable_console()` also appends default boot args to the vm object but the testing calling enable_console() replace the boot args while calling basic_config(). Signed-off-by: Sudan Landge --- tests/framework/microvm.py | 3 --- tests/framework/microvm_helpers.py | 1 + tests/integration_tests/functional/test_initrd.py | 2 +- .../functional/test_kernel_cmdline.py | 2 +- tests/integration_tests/functional/test_serial_io.py | 10 +++++----- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index cb705e6326f..af2f9a2a5f3 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -543,9 +543,6 @@ def spawn( # Checking the timings requires DEBUG level log messages self.time_api_requests = False - if not self.jailer.daemonize: - self.jailer.new_pid_ns = False - cmd = [str(self.jailer_binary_path)] + self.jailer.construct_param_list() if self._numa_node is not None: node = str(self._numa_node) diff --git a/tests/framework/microvm_helpers.py b/tests/framework/microvm_helpers.py index 26c62ce07f0..851cbeddc4e 100644 --- a/tests/framework/microvm_helpers.py +++ b/tests/framework/microvm_helpers.py @@ -122,6 +122,7 @@ def enable_console(self): self.vm.boot_args = "" self.vm.boot_args += "console=ttyS0 reboot=k panic=1" self.vm.jailer.daemonize = False + self.vm.jailer.new_pid_ns = False def how_to_console(self): """Print how to connect to the VM console""" diff --git a/tests/integration_tests/functional/test_initrd.py b/tests/integration_tests/functional/test_initrd.py index 31e9407ed4b..3c48ecd63a5 100644 --- a/tests/integration_tests/functional/test_initrd.py +++ b/tests/integration_tests/functional/test_initrd.py @@ -12,7 +12,7 @@ def test_microvm_initrd_with_serial(uvm_with_initrd): Test that a boot using initrd successfully loads the root filesystem. """ vm = uvm_with_initrd - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.memory_monitor = None diff --git a/tests/integration_tests/functional/test_kernel_cmdline.py b/tests/integration_tests/functional/test_kernel_cmdline.py index 2ba271b51fc..14e369790f1 100644 --- a/tests/integration_tests/functional/test_kernel_cmdline.py +++ b/tests/integration_tests/functional/test_kernel_cmdline.py @@ -13,7 +13,7 @@ def test_init_params(uvm_plain): altered or misplaced. """ vm = uvm_plain - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.memory_monitor = None diff --git a/tests/integration_tests/functional/test_serial_io.py b/tests/integration_tests/functional/test_serial_io.py index 9ff76b96e87..b8cd7f9118b 100644 --- a/tests/integration_tests/functional/test_serial_io.py +++ b/tests/integration_tests/functional/test_serial_io.py @@ -50,7 +50,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory): Serial I/O after restoring from a snapshot. """ microvm = uvm_plain - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() microvm.basic_config( vcpu_count=2, @@ -71,7 +71,7 @@ def test_serial_after_snapshot(uvm_plain, microvm_factory): # Load microVM clone from snapshot. vm = microvm_factory.build() - vm.jailer.daemonize = False + vm.help.enable_console() vm.spawn() vm.restore_from_snapshot(snapshot, resume=True) serial = Serial(vm) @@ -91,7 +91,7 @@ def test_serial_console_login(uvm_plain_any): Test serial console login. """ microvm = uvm_plain_any - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() # We don't need to monitor the memory for this test because we are @@ -139,7 +139,7 @@ def test_serial_dos(uvm_plain_any): Test serial console behavior under DoS. """ microvm = uvm_plain_any - microvm.jailer.daemonize = False + microvm.help.enable_console() microvm.spawn() # Set up the microVM with 1 vCPU and a serial console. @@ -169,7 +169,7 @@ def test_serial_block(uvm_plain_any): Test that writing to stdout never blocks the vCPU thread. """ test_microvm = uvm_plain_any - test_microvm.jailer.daemonize = False + test_microvm.help.enable_console() test_microvm.spawn() # Set up the microVM with 1 vCPU so we make sure the vCPU thread # responsible for the SSH connection will also run the serial. From 49ab17ba595c51f8fb0b4f488b6a4fac8c255737 Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Sun, 11 Feb 2024 22:42:09 +0000 Subject: [PATCH 6/7] tests: test all combinations of daemonize and new_pid_ns Microvm kill() has the logic to detect if Firecracker was actually killed using the PID stored in the Jailers root directory but, not all combinations of Jailers daemonize/new_pid_ns flags are tested so add new a test to try all 4 use cases. Signed-off-by: Sudan Landge --- tests/integration_tests/security/test_jail.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/integration_tests/security/test_jail.py b/tests/integration_tests/security/test_jail.py index 6956b1ace94..ff4fd6ff3dd 100644 --- a/tests/integration_tests/security/test_jail.py +++ b/tests/integration_tests/security/test_jail.py @@ -581,3 +581,35 @@ def test_new_pid_namespace(uvm_plain): assert len(nstgid_list) == 2 assert int(nstgid_list[1]) == 1 assert int(nstgid_list[0]) == fc_pid + + +@pytest.mark.parametrize( + "daemonize", + [True, False], +) +@pytest.mark.parametrize( + "new_pid_ns", + [True, False], +) +def test_firecracker_kill_by_pid(uvm_plain, daemonize, new_pid_ns): + """ + Test that Firecracker is spawned in a new PID namespace if requested. + """ + microvm = uvm_plain + microvm.jailer.daemonize = daemonize + microvm.jailer.new_pid_ns = new_pid_ns + microvm.spawn() + microvm.basic_config() + microvm.add_net_iface() + microvm.start() + + # verify the guest is active + exit_code, _, _ = microvm.ssh.run("ls") + assert exit_code == 0 + + # before killing microvm make sure the Jailer config is what we set it to be. + assert ( + microvm.jailer.daemonize == daemonize + and microvm.jailer.new_pid_ns == new_pid_ns + ) + microvm.kill() From 3872d22a899608593c756a8f2e6f77f48d010d8b Mon Sep 17 00:00:00 2001 From: Sudan Landge Date: Mon, 12 Feb 2024 11:35:47 +0000 Subject: [PATCH 7/7] changelog: update Jailer limitations and suggestions There was a change in behaviour introduced in Jailer because of which killing the Jailer does not kill the Firecracker process. Highlight the change in behaviour and suggested workaround in the CHANGELOG and Jailer doc. Signed-off-by: Sudan Landge --- CHANGELOG.md | 13 +++++++++++++ docs/jailer.md | 11 +++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b9f28e8458..c6293caa831 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,19 @@ and this project adheres to This fixes a bug where a microVM with incompatible balloon and guest memory size could be booted, due to the check for this condition happening after Firecracker's configuration was updated. +- [#4259](https://github.com/firecracker-microvm/firecracker/pull/4259): Added a + double fork mechanism in the Jailer to avoid setsid() failures occurred while + running Jailer as the process group leader. However, this changed the + behaviour of Jailer and now the Firecracker process will always have a + different PID than the Jailer process. + [#4436](https://github.com/firecracker-microvm/firecracker/pull/4436): Added a + "Known Limitations" section in the Jailer docs to highlight the above change + in behaviour introduced in PR#4259. + [#4442](https://github.com/firecracker-microvm/firecracker/pull/4442): As a + solution to the change in behaviour introduced in PR#4259, provided a + mechanism to reliably fetch Firecracker PID. With this change, Firecracker + process's PID will always be available in the Jailer's root directory + regardless of whether new_pid_ns was set. ## \[1.6.0\] diff --git a/docs/jailer.md b/docs/jailer.md index 3803dd43b44..2495039b5cf 100644 --- a/docs/jailer.md +++ b/docs/jailer.md @@ -280,10 +280,13 @@ Note: default value for `` is `/run/firecracker.socket`. ### Known limitations - When passing the --daemonize option to Firecracker without the --new-ns-pid - option, the Firecracker process will have a different pid than the Jailer - process. The suggested workaround to get Firecracker process's pid in this - case is using `--new-pid-ns` flag and read Firecracker's pid from the - `firecracker.pid` file present in the jailer's root directory. + option, the Firecracker process will have a different PID than the Jailer + process and killing the Jailer will not kill the Firecracker process. As a + workaround to get Firecracker PID, the Jailer stores the PID of the child + process in the jail root directory inside `.pid` for all cases + regardless of whether `--new-pid-ns` was provided. The suggested way to fetch + Firecracker's PID when using the Jailer is to read the `firecracker.pid` file + present in the Jailer's root directory. ## Caveats