fix(dso): do not eagerly resolve symlinks

This change contains three improvements:

- we use a separate root node, so that it is easier to debug
  what is going on
- we now are able to print the DOT graph of ELF binaries in
  DsoCleaner
- we do not eagerly resolve symlinks, which produced completely
  incorrect results before
This commit is contained in:
Matteo Settenvini 2025-01-30 15:54:57 +01:00
parent a05dd7084d
commit 7ec6467b3b
Signed by: matteo
GPG Key ID: 1C1B12600D81DE05
5 changed files with 130 additions and 52 deletions

7
Cargo.lock generated
View File

@ -328,6 +328,12 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "indoc"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
@ -632,6 +638,7 @@ dependencies = [
"env_logger", "env_logger",
"goblin", "goblin",
"ignore", "ignore",
"indoc",
"log", "log",
"memmap2", "memmap2",
"nix", "nix",

View File

@ -15,6 +15,7 @@ async-trait = { version = "0.1" }
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
env_logger = { version = "0.11" } env_logger = { version = "0.11" }
ignore = { version = "0.4" } ignore = { version = "0.4" }
indoc = { version = "2.0" }
goblin = { version = "0.9" } goblin = { version = "0.9" }
log = { version = "0.4" } log = { version = "0.4" }
memmap2 = { version = "0.9" } memmap2 = { version = "0.9" }

View File

@ -27,6 +27,11 @@ pub struct Args {
#[arg(long)] #[arg(long)]
pub blocklist: Option<PathBuf>, pub blocklist: Option<PathBuf>,
/// An optional path to save the file graph of the DSO cleaner
/// in GraphViz format. Useful for debugging.
#[arg(long)]
pub output_dotfile: Option<PathBuf>,
/// The location of the sysroot to clean up /// The location of the sysroot to clean up
pub sysroot_location: PathBuf, pub sysroot_location: PathBuf,
} }

View File

@ -47,7 +47,9 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
impl Runner { impl Runner {
pub fn new(args: Args) -> Self { pub fn new(args: Args) -> Self {
let removal_fn = Self::new_removal_fn(&args); let removal_fn = Self::new_removal_fn(&args);
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())]; let mut cleaners: Cleaners = vec![];
cleaners.push(Box::new(DsoCleaner::new(args.output_dotfile)));
if let Some(wl) = args.allowlist { if let Some(wl) = args.allowlist {
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl))); cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));

View File

@ -8,11 +8,15 @@ use async_trait::async_trait;
use goblin::elf::Elf; use goblin::elf::Elf;
use memmap2::Mmap; use memmap2::Mmap;
use nix::{errno::Errno, libc::ino_t}; use nix::{errno::Errno, libc::ino_t};
use petgraph::{prelude::DiGraphMap, visit::Dfs}; use petgraph::{
dot,
prelude::DiGraphMap,
visit::{Dfs, NodeRef},
};
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
fs::File, fs::{self, File},
io::{ErrorKind, Read, Seek}, io::{ErrorKind, Read, Seek, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use tokio::sync::{ use tokio::sync::{
@ -25,16 +29,33 @@ type InodeGraph = DiGraphMap<ino_t, ()>;
/// Cleans up unused shared libraries /// Cleans up unused shared libraries
/// and warns about broken dependencies as well /// and warns about broken dependencies as well
#[derive(Default)] pub struct DsoCleaner {
pub struct DsoCleaner {} output_dot: Option<PathBuf>,
}
#[derive(Default)]
struct State { struct State {
paths_map: InodeMap, paths_map: InodeMap,
graph: InodeGraph, graph: InodeGraph,
} }
const INODE_ANY_EXECUTABLE: ino_t = 0; impl DsoCleaner {
pub fn new(output_dot: Option<PathBuf>) -> Self {
Self { output_dot }
}
}
impl Default for State {
fn default() -> Self {
let mut paths_map = InodeMap::default();
let mut graph = InodeGraph::default();
paths_map.insert(ROOT_NODE, HashSet::from([PathBuf::from("«root»")]));
graph.add_node(ROOT_NODE);
Self { paths_map, graph }
}
}
const ROOT_NODE: ino_t = 0;
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF"; const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
#[async_trait] #[async_trait]
@ -50,7 +71,11 @@ impl Cleaner for DsoCleaner {
match files.recv().await { match files.recv().await {
Ok(file) => { Ok(file) => {
if let Err(e) = Self::process_file(&mut state, &file) { if let Err(e) = Self::process_file(&mut state, &file) {
log::warn!("{}: {}", file.display(), e); log::warn!(
"{}: {} (this might produce wrong results!)",
file.display(),
e
);
} }
} }
Err(RecvError::Closed) => break, Err(RecvError::Closed) => break,
@ -60,16 +85,11 @@ impl Cleaner for DsoCleaner {
} }
} }
// DEBUGGING stuff you can uncomment for a quick and dirty graph: if let Some(dot) = &self.output_dot {
// println!( debug_print_graph(&state, &dot)?;
// "{:?}",
// petgraph::dot::Dot::with_config(&state.graph, &[petgraph::dot::Config::EdgeNoLabel])
// );
let mut dfs = Dfs::empty(&state.graph);
if state.graph.contains_node(INODE_ANY_EXECUTABLE) {
dfs.move_to(INODE_ANY_EXECUTABLE);
} }
let mut dfs = Dfs::new(&state.graph, ROOT_NODE);
while let Some(_) = dfs.next(&state.graph) {} while let Some(_) = dfs.next(&state.graph) {}
for path in state for path in state
@ -98,6 +118,7 @@ impl Cleaner for DsoCleaner {
impl DsoCleaner { impl DsoCleaner {
fn process_file(state: &mut State, path: &Path) -> Result<()> { fn process_file(state: &mut State, path: &Path) -> Result<()> {
log::trace!("processing {}", path.display());
let mut f = File::open(path)?; let mut f = File::open(path)?;
let mut hdr = [0u8; 4]; let mut hdr = [0u8; 4];
if let Err(e) = f.read_exact(&mut hdr) { if let Err(e) = f.read_exact(&mut hdr) {
@ -118,61 +139,63 @@ impl DsoCleaner {
let elf = Elf::parse(&mmap)?; let elf = Elf::parse(&mmap)?;
if path.is_symlink() { if path.is_symlink() {
if !elf.is_lib { Self::process_elf_symlink(state, path, &elf)
// we don't care about symlinks to
// executables in our graph, as we
// are cleaning up only DSOs.
Ok(())
} else {
Self::process_elf_symlink(state, path)
}
} else { } else {
Self::process_elf_file(state, path, &elf) Self::process_elf_file(state, path, &elf)
} }
} }
fn process_elf_symlink(state: &mut State, path: &Path) -> Result<()> { fn process_elf_symlink(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
let src = nix::sys::stat::lstat(path)?; let src = nix::sys::stat::lstat(path)?;
let dst = nix::sys::stat::stat(path)?; if !elf.is_lib {
// To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE
Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino);
}
let current_dir = std::env::current_dir()?;
let mut dst_path = std::fs::read_link(path)?;
if dst_path.is_absolute() {
dst_path = dst_path.strip_prefix("/")?.into();
} else {
let parent = path.parent().unwrap();
dst_path = fs::canonicalize(parent.join(dst_path))?
.strip_prefix(current_dir)?
.into();
}
let dst = nix::sys::stat::stat(&dst_path)?;
if src.st_dev != dst.st_dev { if src.st_dev != dst.st_dev {
log::warn!( log::warn!(
"dso: {} points outside of the sysroot filesystem, check if this is intended", "dso: {} points outside of the sysroot filesystem, check if this is intended",
path.display() path.display()
); );
return Ok(()); } else {
log::trace!(
"dso: adding to graph symlink: '{}' to '{}'",
path.display(),
dst_path.display()
);
Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino);
} }
let current_dir = std::env::current_dir()?;
let dst_path = std::fs::canonicalize(path)?
.strip_prefix(current_dir)?
.to_path_buf();
log::trace!(
"dso: adding to graph symlink: '{}' to '{}'",
path.display(),
dst_path.display()
);
Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino);
Ok(()) Ok(())
} }
fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> { fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
log::trace!("dso: adding to graph elf file '{}'", path.display()); log::trace!("dso: adding to graph elf file '{}'", path.display());
let search_paths = Self::determine_lib_search_paths(path, elf)?; let src = nix::sys::stat::stat(path)?;
let src_stat = nix::sys::stat::stat(path)?; if !elf.is_lib {
let src_inode = if elf.is_lib { // To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE
src_stat.st_ino Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino);
} else { }
// We put all executables in the same node
INODE_ANY_EXECUTABLE let search_paths = Self::determine_lib_search_paths(path, elf)?;
};
'next_lib: for &library in elf.libraries.iter() { 'next_lib: for &library in elf.libraries.iter() {
for lib_path in search_paths.iter() { for lib_path in search_paths.iter() {
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library); let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
let dst = match nix::sys::stat::stat(&tentative_path) { let dst = match nix::sys::stat::lstat(&tentative_path) {
Ok(dst) => dst, Ok(dst) => dst,
Err(Errno::ENOENT) => continue, Err(Errno::ENOENT) => continue,
Err(e) => anyhow::bail!( Err(e) => anyhow::bail!(
@ -182,11 +205,11 @@ impl DsoCleaner {
), ),
}; };
if src_stat.st_dev != dst.st_dev { if src.st_dev != dst.st_dev {
continue; // These are not the droids you are looking for. continue; // These are not the droids you are looking for.
} }
Self::update_graph(state, path.into(), src_inode, tentative_path, dst.st_ino); Self::update_graph(state, path.into(), src.st_ino, tentative_path, dst.st_ino);
continue 'next_lib; continue 'next_lib;
} }
@ -272,3 +295,43 @@ impl DsoCleaner {
state.graph.add_edge(src_inode, dst_inode, ()); state.graph.add_edge(src_inode, dst_inode, ());
} }
} }
fn debug_print_graph(state: &State, output_file: &Path) -> Result<()> {
use std::ffi::OsStr;
let mut output_dot = File::create(output_file)?;
write!(
&mut output_dot,
indoc::indoc! {
"digraph {{
rankdir=\"LR\"
{:?}
}}"
},
petgraph::dot::Dot::with_attr_getters(
&state.graph,
&[
dot::Config::NodeNoLabel,
dot::Config::EdgeNoLabel,
dot::Config::GraphContentOnly
],
&|_, _| { String::new() },
&|_, n| {
let paths = state.paths_map.get(&n.id()).unwrap();
let first_path = paths.iter().next().expect(&format!(
"dso: you have a path map with an empty entry for inode {}",
n.id()
));
format!(
"label = \"({}, {})\"",
n.weight(),
&first_path
.file_name()
.unwrap_or(OsStr::new("🚀"))
.to_string_lossy()
)
}
)
)?;
Ok(())
}