fix(dso): do not eagerly resolve symlinks
This change contains three improvements: - we use a separate root node, so that it is easier to debug what is going on - we now are able to print the DOT graph of ELF binaries in DsoCleaner - we do not eagerly resolve symlinks, which produced completely incorrect results before
This commit is contained in:
parent
a05dd7084d
commit
7ec6467b3b
|
@ -328,6 +328,12 @@ dependencies = [
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indoc"
|
||||||
|
version = "2.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is_terminal_polyfill"
|
name = "is_terminal_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.1"
|
||||||
|
@ -632,6 +638,7 @@ dependencies = [
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"goblin",
|
"goblin",
|
||||||
"ignore",
|
"ignore",
|
||||||
|
"indoc",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"nix",
|
"nix",
|
||||||
|
|
|
@ -15,6 +15,7 @@ async-trait = { version = "0.1" }
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
env_logger = { version = "0.11" }
|
env_logger = { version = "0.11" }
|
||||||
ignore = { version = "0.4" }
|
ignore = { version = "0.4" }
|
||||||
|
indoc = { version = "2.0" }
|
||||||
goblin = { version = "0.9" }
|
goblin = { version = "0.9" }
|
||||||
log = { version = "0.4" }
|
log = { version = "0.4" }
|
||||||
memmap2 = { version = "0.9" }
|
memmap2 = { version = "0.9" }
|
||||||
|
|
|
@ -27,6 +27,11 @@ pub struct Args {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub blocklist: Option<PathBuf>,
|
pub blocklist: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// An optional path to save the file graph of the DSO cleaner
|
||||||
|
/// in GraphViz format. Useful for debugging.
|
||||||
|
#[arg(long)]
|
||||||
|
pub output_dotfile: Option<PathBuf>,
|
||||||
|
|
||||||
/// The location of the sysroot to clean up
|
/// The location of the sysroot to clean up
|
||||||
pub sysroot_location: PathBuf,
|
pub sysroot_location: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,9 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
|
||||||
impl Runner {
|
impl Runner {
|
||||||
pub fn new(args: Args) -> Self {
|
pub fn new(args: Args) -> Self {
|
||||||
let removal_fn = Self::new_removal_fn(&args);
|
let removal_fn = Self::new_removal_fn(&args);
|
||||||
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())];
|
let mut cleaners: Cleaners = vec![];
|
||||||
|
|
||||||
|
cleaners.push(Box::new(DsoCleaner::new(args.output_dotfile)));
|
||||||
|
|
||||||
if let Some(wl) = args.allowlist {
|
if let Some(wl) = args.allowlist {
|
||||||
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
|
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
|
||||||
|
|
|
@ -8,11 +8,15 @@ use async_trait::async_trait;
|
||||||
use goblin::elf::Elf;
|
use goblin::elf::Elf;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use nix::{errno::Errno, libc::ino_t};
|
use nix::{errno::Errno, libc::ino_t};
|
||||||
use petgraph::{prelude::DiGraphMap, visit::Dfs};
|
use petgraph::{
|
||||||
|
dot,
|
||||||
|
prelude::DiGraphMap,
|
||||||
|
visit::{Dfs, NodeRef},
|
||||||
|
};
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
fs::File,
|
fs::{self, File},
|
||||||
io::{ErrorKind, Read, Seek},
|
io::{ErrorKind, Read, Seek, Write},
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
use tokio::sync::{
|
use tokio::sync::{
|
||||||
|
@ -25,16 +29,33 @@ type InodeGraph = DiGraphMap<ino_t, ()>;
|
||||||
|
|
||||||
/// Cleans up unused shared libraries
|
/// Cleans up unused shared libraries
|
||||||
/// and warns about broken dependencies as well
|
/// and warns about broken dependencies as well
|
||||||
#[derive(Default)]
|
pub struct DsoCleaner {
|
||||||
pub struct DsoCleaner {}
|
output_dot: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct State {
|
struct State {
|
||||||
paths_map: InodeMap,
|
paths_map: InodeMap,
|
||||||
graph: InodeGraph,
|
graph: InodeGraph,
|
||||||
}
|
}
|
||||||
|
|
||||||
const INODE_ANY_EXECUTABLE: ino_t = 0;
|
impl DsoCleaner {
|
||||||
|
pub fn new(output_dot: Option<PathBuf>) -> Self {
|
||||||
|
Self { output_dot }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for State {
|
||||||
|
fn default() -> Self {
|
||||||
|
let mut paths_map = InodeMap::default();
|
||||||
|
let mut graph = InodeGraph::default();
|
||||||
|
paths_map.insert(ROOT_NODE, HashSet::from([PathBuf::from("«root»")]));
|
||||||
|
graph.add_node(ROOT_NODE);
|
||||||
|
|
||||||
|
Self { paths_map, graph }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ROOT_NODE: ino_t = 0;
|
||||||
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
|
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
@ -50,7 +71,11 @@ impl Cleaner for DsoCleaner {
|
||||||
match files.recv().await {
|
match files.recv().await {
|
||||||
Ok(file) => {
|
Ok(file) => {
|
||||||
if let Err(e) = Self::process_file(&mut state, &file) {
|
if let Err(e) = Self::process_file(&mut state, &file) {
|
||||||
log::warn!("{}: {}", file.display(), e);
|
log::warn!(
|
||||||
|
"{}: {} (this might produce wrong results!)",
|
||||||
|
file.display(),
|
||||||
|
e
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(RecvError::Closed) => break,
|
Err(RecvError::Closed) => break,
|
||||||
|
@ -60,16 +85,11 @@ impl Cleaner for DsoCleaner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DEBUGGING stuff you can uncomment for a quick and dirty graph:
|
if let Some(dot) = &self.output_dot {
|
||||||
// println!(
|
debug_print_graph(&state, &dot)?;
|
||||||
// "{:?}",
|
|
||||||
// petgraph::dot::Dot::with_config(&state.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
|
||||||
// );
|
|
||||||
|
|
||||||
let mut dfs = Dfs::empty(&state.graph);
|
|
||||||
if state.graph.contains_node(INODE_ANY_EXECUTABLE) {
|
|
||||||
dfs.move_to(INODE_ANY_EXECUTABLE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut dfs = Dfs::new(&state.graph, ROOT_NODE);
|
||||||
while let Some(_) = dfs.next(&state.graph) {}
|
while let Some(_) = dfs.next(&state.graph) {}
|
||||||
|
|
||||||
for path in state
|
for path in state
|
||||||
|
@ -98,6 +118,7 @@ impl Cleaner for DsoCleaner {
|
||||||
|
|
||||||
impl DsoCleaner {
|
impl DsoCleaner {
|
||||||
fn process_file(state: &mut State, path: &Path) -> Result<()> {
|
fn process_file(state: &mut State, path: &Path) -> Result<()> {
|
||||||
|
log::trace!("processing {}", path.display());
|
||||||
let mut f = File::open(path)?;
|
let mut f = File::open(path)?;
|
||||||
let mut hdr = [0u8; 4];
|
let mut hdr = [0u8; 4];
|
||||||
if let Err(e) = f.read_exact(&mut hdr) {
|
if let Err(e) = f.read_exact(&mut hdr) {
|
||||||
|
@ -118,61 +139,63 @@ impl DsoCleaner {
|
||||||
let elf = Elf::parse(&mmap)?;
|
let elf = Elf::parse(&mmap)?;
|
||||||
|
|
||||||
if path.is_symlink() {
|
if path.is_symlink() {
|
||||||
if !elf.is_lib {
|
Self::process_elf_symlink(state, path, &elf)
|
||||||
// we don't care about symlinks to
|
|
||||||
// executables in our graph, as we
|
|
||||||
// are cleaning up only DSOs.
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Self::process_elf_symlink(state, path)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Self::process_elf_file(state, path, &elf)
|
Self::process_elf_file(state, path, &elf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn process_elf_symlink(state: &mut State, path: &Path) -> Result<()> {
|
fn process_elf_symlink(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
|
||||||
let src = nix::sys::stat::lstat(path)?;
|
let src = nix::sys::stat::lstat(path)?;
|
||||||
let dst = nix::sys::stat::stat(path)?;
|
if !elf.is_lib {
|
||||||
|
// To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE
|
||||||
|
Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino);
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_dir = std::env::current_dir()?;
|
||||||
|
let mut dst_path = std::fs::read_link(path)?;
|
||||||
|
if dst_path.is_absolute() {
|
||||||
|
dst_path = dst_path.strip_prefix("/")?.into();
|
||||||
|
} else {
|
||||||
|
let parent = path.parent().unwrap();
|
||||||
|
dst_path = fs::canonicalize(parent.join(dst_path))?
|
||||||
|
.strip_prefix(current_dir)?
|
||||||
|
.into();
|
||||||
|
}
|
||||||
|
|
||||||
|
let dst = nix::sys::stat::stat(&dst_path)?;
|
||||||
if src.st_dev != dst.st_dev {
|
if src.st_dev != dst.st_dev {
|
||||||
log::warn!(
|
log::warn!(
|
||||||
"dso: {} points outside of the sysroot filesystem, check if this is intended",
|
"dso: {} points outside of the sysroot filesystem, check if this is intended",
|
||||||
path.display()
|
path.display()
|
||||||
);
|
);
|
||||||
return Ok(());
|
} else {
|
||||||
|
log::trace!(
|
||||||
|
"dso: adding to graph symlink: '{}' to '{}'",
|
||||||
|
path.display(),
|
||||||
|
dst_path.display()
|
||||||
|
);
|
||||||
|
Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino);
|
||||||
}
|
}
|
||||||
let current_dir = std::env::current_dir()?;
|
|
||||||
let dst_path = std::fs::canonicalize(path)?
|
|
||||||
.strip_prefix(current_dir)?
|
|
||||||
.to_path_buf();
|
|
||||||
|
|
||||||
log::trace!(
|
|
||||||
"dso: adding to graph symlink: '{}' to '{}'",
|
|
||||||
path.display(),
|
|
||||||
dst_path.display()
|
|
||||||
);
|
|
||||||
|
|
||||||
Self::update_graph(state, path.into(), src.st_ino, dst_path, dst.st_ino);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
|
fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
|
||||||
log::trace!("dso: adding to graph elf file '{}'", path.display());
|
log::trace!("dso: adding to graph elf file '{}'", path.display());
|
||||||
|
|
||||||
let search_paths = Self::determine_lib_search_paths(path, elf)?;
|
let src = nix::sys::stat::stat(path)?;
|
||||||
|
|
||||||
let src_stat = nix::sys::stat::stat(path)?;
|
if !elf.is_lib {
|
||||||
let src_inode = if elf.is_lib {
|
// To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE
|
||||||
src_stat.st_ino
|
Self::update_graph(state, "".into(), ROOT_NODE, path.to_owned(), src.st_ino);
|
||||||
} else {
|
}
|
||||||
// We put all executables in the same node
|
|
||||||
INODE_ANY_EXECUTABLE
|
let search_paths = Self::determine_lib_search_paths(path, elf)?;
|
||||||
};
|
|
||||||
|
|
||||||
'next_lib: for &library in elf.libraries.iter() {
|
'next_lib: for &library in elf.libraries.iter() {
|
||||||
for lib_path in search_paths.iter() {
|
for lib_path in search_paths.iter() {
|
||||||
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
|
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
|
||||||
let dst = match nix::sys::stat::stat(&tentative_path) {
|
let dst = match nix::sys::stat::lstat(&tentative_path) {
|
||||||
Ok(dst) => dst,
|
Ok(dst) => dst,
|
||||||
Err(Errno::ENOENT) => continue,
|
Err(Errno::ENOENT) => continue,
|
||||||
Err(e) => anyhow::bail!(
|
Err(e) => anyhow::bail!(
|
||||||
|
@ -182,11 +205,11 @@ impl DsoCleaner {
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
if src_stat.st_dev != dst.st_dev {
|
if src.st_dev != dst.st_dev {
|
||||||
continue; // These are not the droids you are looking for.
|
continue; // These are not the droids you are looking for.
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::update_graph(state, path.into(), src_inode, tentative_path, dst.st_ino);
|
Self::update_graph(state, path.into(), src.st_ino, tentative_path, dst.st_ino);
|
||||||
continue 'next_lib;
|
continue 'next_lib;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,3 +295,43 @@ impl DsoCleaner {
|
||||||
state.graph.add_edge(src_inode, dst_inode, ());
|
state.graph.add_edge(src_inode, dst_inode, ());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn debug_print_graph(state: &State, output_file: &Path) -> Result<()> {
|
||||||
|
use std::ffi::OsStr;
|
||||||
|
|
||||||
|
let mut output_dot = File::create(output_file)?;
|
||||||
|
write!(
|
||||||
|
&mut output_dot,
|
||||||
|
indoc::indoc! {
|
||||||
|
"digraph {{
|
||||||
|
rankdir=\"LR\"
|
||||||
|
{:?}
|
||||||
|
}}"
|
||||||
|
},
|
||||||
|
petgraph::dot::Dot::with_attr_getters(
|
||||||
|
&state.graph,
|
||||||
|
&[
|
||||||
|
dot::Config::NodeNoLabel,
|
||||||
|
dot::Config::EdgeNoLabel,
|
||||||
|
dot::Config::GraphContentOnly
|
||||||
|
],
|
||||||
|
&|_, _| { String::new() },
|
||||||
|
&|_, n| {
|
||||||
|
let paths = state.paths_map.get(&n.id()).unwrap();
|
||||||
|
let first_path = paths.iter().next().expect(&format!(
|
||||||
|
"dso: you have a path map with an empty entry for inode {}",
|
||||||
|
n.id()
|
||||||
|
));
|
||||||
|
format!(
|
||||||
|
"label = \"({}, {})\"",
|
||||||
|
n.weight(),
|
||||||
|
&first_path
|
||||||
|
.file_name()
|
||||||
|
.unwrap_or(OsStr::new("🚀"))
|
||||||
|
.to_string_lossy()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue