Skip to content

Commit 8d4bf5c

Browse files
authored
Merge pull request #919 from cgwalters/dynamic-mount-dev
install: Automatically set up /dev and /var/lib/containers
2 parents d866f5c + 3261203 commit 8d4bf5c

File tree

7 files changed

+187
-98
lines changed

7 files changed

+187
-98
lines changed

hack/lldb/deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sudo podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" -f Containerf
1111
mkdir -p ~/.cache/bootc-dev/disks
1212
rm -f ~/.cache/bootc-dev/disks/lldb.raw
1313
truncate -s 10G ~/.cache/bootc-dev/disks/lldb.raw
14-
sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v /dev:/dev -v /var/lib/containers:/var/lib/containers -v ~/.cache/bootc-dev/disks:/output -v /dev:/dev localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw
14+
sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v ~/.cache/bootc-dev/disks:/output localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw
1515

1616
# create a new VM in libvirt
1717
set +e

lib/src/install.rs

Lines changed: 28 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ use crate::utils::sigpolicy_from_opts;
5656
const BOOT: &str = "boot";
5757
/// Directory for transient runtime state
5858
const RUN_BOOTC: &str = "/run/bootc";
59+
/// The default path for the host rootfs
60+
const ALONGSIDE_ROOT_MOUNT: &str = "/target";
5961
/// This is an ext4 special directory we need to ignore.
6062
const LOST_AND_FOUND: &str = "lost+found";
6163
/// The filename of the composefs EROFS superblock; TODO move this into ostree
@@ -316,9 +318,10 @@ pub(crate) struct InstallToExistingRootOpts {
316318
#[clap(long)]
317319
pub(crate) acknowledge_destructive: bool,
318320

319-
/// Path to the mounted root; it's expected to invoke podman with
320-
/// `-v /:/target`, then supplying this argument is unnecessary.
321-
#[clap(default_value = "/target")]
321+
/// Path to the mounted root; this is now not necessary to provide.
322+
/// Historically it was necessary to ensure the host rootfs was mounted at here
323+
/// via e.g. `-v /:/target`.
324+
#[clap(default_value = ALONGSIDE_ROOT_MOUNT)]
322325
pub(crate) root_path: Utf8PathBuf,
323326
}
324327

@@ -333,8 +336,6 @@ pub(crate) struct SourceInfo {
333336
pub(crate) selinux: bool,
334337
/// Whether the source is available in the host mount namespace
335338
pub(crate) in_host_mountns: bool,
336-
/// Whether we were invoked with -v /var/lib/containers:/var/lib/containers
337-
pub(crate) have_host_container_storage: bool,
338339
}
339340

340341
// Shared read-only global state
@@ -516,38 +517,13 @@ impl SourceInfo {
516517
tracing::debug!("Finding digest for image ID {}", container_info.imageid);
517518
let digest = crate::podman::imageid_to_digest(&container_info.imageid)?;
518519

519-
let have_host_container_storage = Utf8Path::new(crate::podman::CONTAINER_STORAGE)
520-
.try_exists()?
521-
&& ostree_ext::mountutil::is_mountpoint(
522-
&root,
523-
crate::podman::CONTAINER_STORAGE.trim_start_matches('/'),
524-
)?
525-
.unwrap_or_default();
526-
527-
// Verify up front we can do the fetch
528-
if have_host_container_storage {
529-
tracing::debug!("Host container storage found");
530-
} else {
531-
tracing::debug!(
532-
"No {} mount available, checking skopeo",
533-
crate::podman::CONTAINER_STORAGE
534-
);
535-
require_skopeo_with_containers_storage()?;
536-
}
537-
538-
Self::new(
539-
imageref,
540-
Some(digest),
541-
root,
542-
true,
543-
have_host_container_storage,
544-
)
520+
Self::new(imageref, Some(digest), root, true)
545521
}
546522

547523
#[context("Creating source info from a given imageref")]
548524
pub(crate) fn from_imageref(imageref: &str, root: &Dir) -> Result<Self> {
549525
let imageref = ostree_container::ImageReference::try_from(imageref)?;
550-
Self::new(imageref, None, root, false, false)
526+
Self::new(imageref, None, root, false)
551527
}
552528

553529
fn have_selinux_from_repo(root: &Dir) -> Result<bool> {
@@ -573,7 +549,6 @@ impl SourceInfo {
573549
digest: Option<String>,
574550
root: &Dir,
575551
in_host_mountns: bool,
576-
have_host_container_storage: bool,
577552
) -> Result<Self> {
578553
let selinux = if Path::new("/ostree/repo").try_exists()? {
579554
Self::have_selinux_from_repo(root)?
@@ -585,7 +560,6 @@ impl SourceInfo {
585560
digest,
586561
selinux,
587562
in_host_mountns,
588-
have_host_container_storage,
589563
})
590564
}
591565
}
@@ -716,19 +690,7 @@ async fn install_container(
716690
}
717691
};
718692

719-
// We need to fetch the container image from the root mount namespace. If
720-
// we don't have /var/lib/containers mounted in this image, fork off skopeo
721-
// in the host mountnfs.
722-
let skopeo_cmd = if !state.source.have_host_container_storage {
723-
Some(run_in_host_mountns("skopeo"))
724-
} else {
725-
None
726-
};
727-
let proxy_cfg = ostree_container::store::ImageProxyConfig {
728-
skopeo_cmd,
729-
..Default::default()
730-
};
731-
693+
let proxy_cfg = ostree_container::store::ImageProxyConfig::default();
732694
(src_imageref, Some(proxy_cfg))
733695
};
734696
let src_imageref = ostree_container::OstreeImageReference {
@@ -895,32 +857,6 @@ pub(crate) fn exec_in_host_mountns(args: &[std::ffi::OsString]) -> Result<()> {
895857
Err(Command::new(cmd).args(args).exec()).context("exec")?
896858
}
897859

898-
#[context("Querying skopeo version")]
899-
fn require_skopeo_with_containers_storage() -> Result<()> {
900-
let out = Task::new_cmd("skopeo --version", run_in_host_mountns("skopeo"))
901-
.args(["--version"])
902-
.quiet()
903-
.read()
904-
.context("Failed to run skopeo (it currently must be installed in the host root)")?;
905-
let mut v = out
906-
.strip_prefix("skopeo version ")
907-
.map(|v| v.split('.'))
908-
.ok_or_else(|| anyhow::anyhow!("Unexpected output from skopeo version"))?;
909-
let major = v
910-
.next()
911-
.ok_or_else(|| anyhow::anyhow!("Missing major version"))?;
912-
let minor = v
913-
.next()
914-
.ok_or_else(|| anyhow::anyhow!("Missing minor version"))?;
915-
let (major, minor) = (major.parse::<u64>()?, minor.parse::<u64>()?);
916-
let supported = major > 1 || minor > 10;
917-
if supported {
918-
Ok(())
919-
} else {
920-
anyhow::bail!("skopeo >= 1.11 is required on host")
921-
}
922-
}
923-
924860
pub(crate) struct RootSetup {
925861
luks_device: Option<String>,
926862
device_info: crate::blockdev::PartitionTable,
@@ -1269,6 +1205,8 @@ async fn prepare_install(
12691205
tracing::debug!("Target image reference: {target_imgref}");
12701206

12711207
// A bit of basic global state setup
1208+
crate::mount::ensure_mirrored_host_mount("/dev")?;
1209+
crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?;
12721210
ensure_var()?;
12731211
setup_tmp_mounts()?;
12741212
// Allocate a temporary directory we can use in various places to avoid
@@ -1454,12 +1392,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re
14541392
.ok_or_else(|| anyhow!("No uuid for boot/root"))?;
14551393
tracing::debug!("boot uuid={boot_uuid}");
14561394

1457-
// If we're doing an alongside install, then the /dev bootupd sees needs to be the host's.
1458-
ensure!(
1459-
crate::mount::is_same_as_host(Utf8Path::new("/dev"))?,
1460-
"Missing /dev mount to host /dev"
1461-
);
1462-
14631395
let bound_images = BoundImages::from_state(state).await?;
14641396

14651397
// Initialize the ostree sysroot (repo, stateroot, etc.)
@@ -1514,9 +1446,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> {
15141446
block_opts.device
15151447
);
15161448
}
1517-
if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? {
1518-
anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)");
1519-
}
15201449
} else if !target_blockdev_meta.file_type().is_block_device() {
15211450
anyhow::bail!("Not a block device: {}", block_opts.device);
15221451
}
@@ -1705,6 +1634,23 @@ pub(crate) async fn install_to_filesystem(
17051634
// And the last bit of state here is the fsopts, which we also destructure now.
17061635
let mut fsopts = opts.filesystem_opts;
17071636

1637+
// If we're doing an alongside install, automatically set up the host rootfs
1638+
// mount if it wasn't done already.
1639+
if targeting_host_root
1640+
&& fsopts.root_path.as_str() == ALONGSIDE_ROOT_MOUNT
1641+
&& !fsopts.root_path.try_exists()?
1642+
{
1643+
tracing::debug!("Mounting host / to {ALONGSIDE_ROOT_MOUNT}");
1644+
std::fs::create_dir(ALONGSIDE_ROOT_MOUNT)?;
1645+
crate::mount::bind_mount_from_pidns(
1646+
crate::mount::PID1,
1647+
"/".into(),
1648+
ALONGSIDE_ROOT_MOUNT.into(),
1649+
true,
1650+
)
1651+
.context("Mounting host / to {ALONGSIDE_ROOT_MOUNT}")?;
1652+
}
1653+
17081654
// Check that the target is a directory
17091655
{
17101656
let root_path = &fsopts.root_path;

lib/src/mount.rs

Lines changed: 154 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,36 @@
11
//! Helpers for interacting with mountpoints
22
3-
use std::process::Command;
3+
use std::{
4+
fs,
5+
os::fd::{AsFd, OwnedFd},
6+
process::Command,
7+
};
48

5-
use anyhow::{anyhow, Result};
9+
use anyhow::{anyhow, Context, Result};
610
use bootc_utils::CommandRunExt;
711
use camino::Utf8Path;
812
use fn_error_context::context;
13+
use rustix::{
14+
mount::{MoveMountFlags, OpenTreeFlags},
15+
net::{
16+
AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
17+
SocketFlags, SocketType,
18+
},
19+
process::WaitOptions,
20+
thread::Pid,
21+
};
922
use serde::Deserialize;
1023

1124
use crate::task::Task;
1225

26+
/// Well known identifier for pid 1
27+
pub(crate) const PID1: Pid = const {
28+
match Pid::from_raw(1) {
29+
Some(v) => v,
30+
None => panic!("Expected to parse pid1"),
31+
}
32+
};
33+
1334
#[derive(Deserialize, Debug)]
1435
#[serde(rename_all = "kebab-case")]
1536
#[allow(dead_code)]
@@ -124,3 +145,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
124145
);
125146
Ok(devstat.f_fsid == hostdevstat.f_fsid)
126147
}
148+
149+
/// Given a pid, enter its mount namespace and acquire a file descriptor
150+
/// for a mount from that namespace.
151+
#[allow(unsafe_code)]
152+
#[context("Opening mount tree from pid")]
153+
pub(crate) fn open_tree_from_pidns(
154+
pid: rustix::process::Pid,
155+
path: &Utf8Path,
156+
recursive: bool,
157+
) -> Result<OwnedFd> {
158+
// Allocate a socket pair to use for sending file descriptors.
159+
let (sock_parent, sock_child) = rustix::net::socketpair(
160+
AddressFamily::UNIX,
161+
SocketType::STREAM,
162+
SocketFlags::CLOEXEC,
163+
None,
164+
)
165+
.context("socketpair")?;
166+
const DUMMY_DATA: &[u8] = &[b'!'];
167+
match unsafe { libc::fork() } {
168+
0 => {
169+
// We're in the child. At this point we know we don't have multiple threads, so we
170+
// can safely `setns`.
171+
172+
// Open up the namespace of the target process as a file descriptor, and enter it.
173+
let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
174+
rustix::thread::move_into_link_name_space(
175+
pidlink.as_fd(),
176+
Some(rustix::thread::LinkNameSpaceType::Mount),
177+
)
178+
.context("setns")?;
179+
180+
// Open the target mount path as a file descriptor.
181+
let recursive = if recursive {
182+
OpenTreeFlags::AT_RECURSIVE
183+
} else {
184+
OpenTreeFlags::empty()
185+
};
186+
let fd = rustix::mount::open_tree(
187+
rustix::fs::CWD,
188+
path.as_std_path(),
189+
OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
190+
)
191+
.context("open_tree")?;
192+
193+
// And send that file descriptor via fd passing over the socketpair.
194+
let fd = fd.as_fd();
195+
let fds = [fd];
196+
let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))];
197+
let mut control = SendAncillaryBuffer::new(&mut buffer);
198+
let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
199+
assert!(pushed);
200+
let ios = std::io::IoSlice::new(DUMMY_DATA);
201+
rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
202+
// Then we're done.
203+
std::process::exit(0)
204+
}
205+
-1 => {
206+
// fork failed
207+
let e = std::io::Error::last_os_error();
208+
anyhow::bail!("failed to fork: {e}");
209+
}
210+
n => {
211+
// We're in the parent; create a pid (checking that n > 0).
212+
let pid = rustix::process::Pid::from_raw(n).unwrap();
213+
// Receive the mount file descriptor from the child
214+
let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))];
215+
let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
216+
let mut buf = [0u8; DUMMY_DATA.len()];
217+
let iov = std::io::IoSliceMut::new(buf.as_mut());
218+
let mut iov = [iov];
219+
let nread = rustix::net::recvmsg(
220+
sock_parent,
221+
&mut iov,
222+
&mut cmsg_buffer,
223+
RecvFlags::CMSG_CLOEXEC,
224+
)
225+
.context("recvmsg")?
226+
.bytes;
227+
assert_eq!(nread, DUMMY_DATA.len());
228+
assert_eq!(buf, DUMMY_DATA);
229+
// And extract the file descriptor
230+
let r = cmsg_buffer
231+
.drain()
232+
.filter_map(|m| match m {
233+
rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
234+
_ => None,
235+
})
236+
.flatten()
237+
.next()
238+
.ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
239+
rustix::process::waitpid(Some(pid), WaitOptions::empty())?;
240+
Ok(r)
241+
}
242+
}
243+
}
244+
245+
/// Create a bind mount from the mount namespace of the target pid
246+
/// into our mount namespace.
247+
pub(crate) fn bind_mount_from_pidns(
248+
pid: Pid,
249+
src: &Utf8Path,
250+
target: &Utf8Path,
251+
recursive: bool,
252+
) -> Result<()> {
253+
let src = open_tree_from_pidns(pid, src, recursive)?;
254+
rustix::mount::move_mount(
255+
src.as_fd(),
256+
"",
257+
rustix::fs::CWD,
258+
target.as_std_path(),
259+
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
260+
)
261+
.context("Moving mount")?;
262+
Ok(())
263+
}
264+
265+
// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev)
266+
// then recursively mount it.
267+
pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
268+
let path = path.as_ref();
269+
// If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
270+
// then create it now.
271+
std::fs::create_dir_all(path)?;
272+
if is_same_as_host(path)? {
273+
tracing::debug!("Already mounted from host: {path}");
274+
return Ok(());
275+
}
276+
tracing::debug!("Propagating host mount: {path}");
277+
bind_mount_from_pidns(PID1, path, path, true)
278+
}

ostree-ext/.github/workflows/bootc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
- name: Integration tests
6060
run: |
6161
set -xeuo pipefail
62-
sudo podman run --rm -ti --privileged -v /:/target -v /var/lib/containers:/var/lib/containers -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \
62+
sudo podman run --rm -ti --privileged -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \
6363
quay.io/centos-bootc/centos-bootc-dev:stream9 bootc install to-filesystem \
6464
--karg=foo=bar --disable-selinux --replace=alongside /target
6565

0 commit comments

Comments
 (0)