From 7ec6467b3bf2ac895dd18b4bbfcd47a46b2bbac3 Mon Sep 17 00:00:00 2001 From: Matteo Settenvini Date: Thu, 30 Jan 2025 15:54:57 +0100 Subject: [PATCH] fix(dso): do not eagerly resolve symlinks This change contains three improvements: - we use a separate root node, so that it is easier to debug what is going on - we now are able to print the DOT graph of ELF binaries in DsoCleaner - we do not eagerly resolve symlinks, which produced completely incorrect results before --- Cargo.lock | 7 ++ Cargo.toml | 1 + src/args.rs | 5 ++ src/cleaners.rs | 4 +- src/cleaners/dso.rs | 165 ++++++++++++++++++++++++++++++-------------- 5 files changed, 130 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee980ed..49f0592 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,6 +328,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -632,6 +638,7 @@ dependencies = [ "env_logger", "goblin", "ignore", + "indoc", "log", "memmap2", "nix", diff --git a/Cargo.toml b/Cargo.toml index 932350b..c2e8436 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ async-trait = { version = "0.1" } clap = { version = "4.5", features = ["derive"] } env_logger = { version = "0.11" } ignore = { version = "0.4" } +indoc = { version = "2.0" } goblin = { version = "0.9" } log = { version = "0.4" } memmap2 = { version = "0.9" } diff --git a/src/args.rs b/src/args.rs index 78e4e4a..af165ae 100644 --- a/src/args.rs +++ b/src/args.rs @@ -27,6 +27,11 @@ pub struct Args { #[arg(long)] pub blocklist: Option, + /// An optional path to save the file graph of the DSO cleaner + /// in GraphViz format. Useful for debugging. + #[arg(long)] + pub output_dotfile: Option, + /// The location of the sysroot to clean up pub sysroot_location: PathBuf, } diff --git a/src/cleaners.rs b/src/cleaners.rs index 3b5a895..dc81713 100644 --- a/src/cleaners.rs +++ b/src/cleaners.rs @@ -47,7 +47,9 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4; impl Runner { pub fn new(args: Args) -> Self { let removal_fn = Self::new_removal_fn(&args); - let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())]; + let mut cleaners: Cleaners = vec![]; + + cleaners.push(Box::new(DsoCleaner::new(args.output_dotfile))); if let Some(wl) = args.allowlist { cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl))); diff --git a/src/cleaners/dso.rs b/src/cleaners/dso.rs index bb973fb..ec60bbf 100644 --- a/src/cleaners/dso.rs +++ b/src/cleaners/dso.rs @@ -8,11 +8,15 @@ use async_trait::async_trait; use goblin::elf::Elf; use memmap2::Mmap; use nix::{errno::Errno, libc::ino_t}; -use petgraph::{prelude::DiGraphMap, visit::Dfs}; +use petgraph::{ + dot, + prelude::DiGraphMap, + visit::{Dfs, NodeRef}, +}; use std::{ collections::{HashMap, HashSet}, - fs::File, - io::{ErrorKind, Read, Seek}, + fs::{self, File}, + io::{ErrorKind, Read, Seek, Write}, path::{Path, PathBuf}, }; use tokio::sync::{ @@ -25,16 +29,33 @@ type InodeGraph = DiGraphMap; /// Cleans up unused shared libraries /// and warns about broken dependencies as well -#[derive(Default)] -pub struct DsoCleaner {} +pub struct DsoCleaner { + output_dot: Option, +} -#[derive(Default)] struct State { paths_map: InodeMap, graph: InodeGraph, } -const INODE_ANY_EXECUTABLE: ino_t = 0; +impl DsoCleaner { + pub fn new(output_dot: Option) -> Self { + Self { output_dot } + } +} + +impl Default for State { + fn default() -> Self { + let mut paths_map = InodeMap::default(); + let mut graph = InodeGraph::default(); + paths_map.insert(ROOT_NODE, HashSet::from([PathBuf::from("«root»")])); + graph.add_node(ROOT_NODE); + + Self { paths_map, graph } + } +} + +const ROOT_NODE: ino_t = 0; const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF"; #[async_trait] @@ -50,7 +71,11 @@ impl Cleaner for DsoCleaner { match files.recv().await { Ok(file) => { if let Err(e) = Self::process_file(&mut state, &file) { - log::warn!("{}: {}", file.display(), e); + log::warn!( + "{}: {} (this might produce wrong results!)", + file.display(), + e + ); } } Err(RecvError::Closed) => break, @@ -60,16 +85,11 @@ impl Cleaner for DsoCleaner { } } - // DEBUGGING stuff you can uncomment for a quick and dirty graph: - // println!( - // "{:?}", - // petgraph::dot::Dot::with_config(&state.graph, &[petgraph::dot::Config::EdgeNoLabel]) - // ); - - let mut dfs = Dfs::empty(&state.graph); - if state.graph.contains_node(INODE_ANY_EXECUTABLE) { - dfs.move_to(INODE_ANY_EXECUTABLE); + if let Some(dot) = &self.output_dot { + debug_print_graph(&state, &dot)?; } + + let mut dfs = Dfs::new(&state.graph, ROOT_NODE); while let Some(_) = dfs.next(&state.graph) {} for path in state @@ -98,6 +118,7 @@ impl Cleaner for DsoCleaner { impl DsoCleaner { fn process_file(state: &mut State, path: &Path) -> Result<()> { + log::trace!("processing {}", path.display()); let mut f = File::open(path)?; let mut hdr = [0u8; 4]; if let Err(e) = f.read_exact(&mut hdr) { @@ -118,61 +139,63 @@ impl DsoCleaner { let elf = Elf::parse(&mmap)?; if path.is_symlink() { - if !elf.is_lib { - // we don't care about symlinks to - // executables in our graph, as we - // are cleaning up only DSOs. - Ok(()) - } else { - Self::process_elf_symlink(state, path) - } + Self::process_elf_symlink(state, path, &elf) } else { Self::process_elf_file(state, path, &elf) } } - fn process_elf_symlink(state: &mut State, path: &Path) -> Result<()> { + fn process_elf_symlink(state: &mut State, path: &Path, elf: &Elf) -> Result<()> { let src = nix::sys::stat::lstat(path)?; - let dst = nix::sys::stat::stat(path)?; + if !elf.is_lib { + // To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE + Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino); + } + + let current_dir = std::env::current_dir()?; + let mut dst_path = std::fs::read_link(path)?; + if dst_path.is_absolute() { + dst_path = dst_path.strip_prefix("/")?.into(); + } else { + let parent = path.parent().unwrap(); + dst_path = fs::canonicalize(parent.join(dst_path))? + .strip_prefix(current_dir)? + .into(); + } + + let dst = nix::sys::stat::stat(&dst_path)?; if src.st_dev != dst.st_dev { log::warn!( "dso: {} points outside of the sysroot filesystem, check if this is intended", path.display() ); - return Ok(()); + } else { + log::trace!( + "dso: adding to graph symlink: '{}' to '{}'", + path.display(), + dst_path.display() + ); + Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino); } - let current_dir = std::env::current_dir()?; - let dst_path = std::fs::canonicalize(path)? - .strip_prefix(current_dir)? - .to_path_buf(); - - log::trace!( - "dso: adding to graph symlink: '{}' to '{}'", - path.display(), - dst_path.display() - ); - - Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino); Ok(()) } fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> { log::trace!("dso: adding to graph elf file '{}'", path.display()); - let search_paths = Self::determine_lib_search_paths(path, elf)?; + let src = nix::sys::stat::stat(path)?; - let src_stat = nix::sys::stat::stat(path)?; - let src_inode = if elf.is_lib { - src_stat.st_ino - } else { - // We put all executables in the same node - INODE_ANY_EXECUTABLE - }; + if !elf.is_lib { + // To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE + Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino); + } + + let search_paths = Self::determine_lib_search_paths(path, elf)?; 'next_lib: for &library in elf.libraries.iter() { for lib_path in search_paths.iter() { let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library); - let dst = match nix::sys::stat::stat(&tentative_path) { + let dst = match nix::sys::stat::lstat(&tentative_path) { Ok(dst) => dst, Err(Errno::ENOENT) => continue, Err(e) => anyhow::bail!( @@ -182,11 +205,11 @@ impl DsoCleaner { ), }; - if src_stat.st_dev != dst.st_dev { + if src.st_dev != dst.st_dev { continue; // These are not the droids you are looking for. } - Self::update_graph(state, path.into(), src_inode, tentative_path, dst.st_ino); + Self::update_graph(state, path.into(), src.st_ino, tentative_path, dst.st_ino); continue 'next_lib; } @@ -272,3 +295,43 @@ impl DsoCleaner { state.graph.add_edge(src_inode, dst_inode, ()); } } + +fn debug_print_graph(state: &State, output_file: &Path) -> Result<()> { + use std::ffi::OsStr; + + let mut output_dot = File::create(output_file)?; + write!( + &mut output_dot, + indoc::indoc! { + "digraph {{ + rankdir=\"LR\" + {:?} + }}" + }, + petgraph::dot::Dot::with_attr_getters( + &state.graph, + &[ + dot::Config::NodeNoLabel, + dot::Config::EdgeNoLabel, + dot::Config::GraphContentOnly + ], + &|_, _| { String::new() }, + &|_, n| { + let paths = state.paths_map.get(&n.id()).unwrap(); + let first_path = paths.iter().next().expect(&format!( + "dso: you have a path map with an empty entry for inode {}", + n.id() + )); + format!( + "label = \"({}, {})\"", + n.weight(), + &first_path + .file_name() + .unwrap_or(OsStr::new("🚀")) + .to_string_lossy() + ) + } + ) + )?; + Ok(()) +}