forked from matteo/sysroot-cleaner
355 lines
11 KiB
Rust
355 lines
11 KiB
Rust
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
|
|
// SPDX-License-Identifier: EUPL-1.2
|
|
|
|
use super::Cleaner;
|
|
use crate::decision::{Action, Decision};
|
|
use anyhow::{Context, Result};
|
|
use async_trait::async_trait;
|
|
use goblin::elf::Elf;
|
|
use memmap2::Mmap;
|
|
use nix::{errno::Errno, libc::ino_t};
|
|
use petgraph::{
|
|
dot,
|
|
prelude::DiGraphMap,
|
|
visit::{Dfs, NodeRef},
|
|
};
|
|
use std::{
|
|
collections::{HashMap, HashSet},
|
|
fs::{self, File},
|
|
io::{ErrorKind, Read, Seek, Write},
|
|
path::{Path, PathBuf},
|
|
};
|
|
use tokio::sync::mpsc;
|
|
|
|
type InodeMap = HashMap<ino_t, HashSet<PathBuf>>;
|
|
type InodeGraph = DiGraphMap<ino_t, ()>;
|
|
|
|
/// Cleans up unused shared libraries
|
|
/// and warns about broken dependencies as well
|
|
pub struct DsoCleaner {
|
|
output_dot: Option<PathBuf>,
|
|
}
|
|
|
|
struct State {
|
|
paths_map: InodeMap,
|
|
graph: InodeGraph,
|
|
}
|
|
|
|
impl DsoCleaner {
|
|
pub fn new(output_dot: Option<PathBuf>) -> Self {
|
|
Self { output_dot }
|
|
}
|
|
}
|
|
|
|
const ROOT_NODE: ino_t = 0;
|
|
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
|
|
|
|
#[async_trait]
|
|
impl Cleaner for DsoCleaner {
|
|
async fn run(
|
|
&mut self,
|
|
mut input: mpsc::Receiver<Decision>,
|
|
output: mpsc::Sender<Decision>,
|
|
) -> Result<()> {
|
|
let mut state = State::default();
|
|
|
|
let mut inodes_to_keep = HashSet::new();
|
|
inodes_to_keep.insert(ROOT_NODE);
|
|
|
|
while let Some(decision) = input.recv().await {
|
|
// If we know something MUST be removed (e.g. a binary or
|
|
// a symlink) it makes sense now to avoid adding it to the graph so
|
|
// that also its dependencies will not be kept.
|
|
if decision.action != Action::Remove {
|
|
state.process_path(&decision.path).unwrap_or_else(|e| {
|
|
log::warn!(
|
|
"{}: {} (this might produce wrong results!)",
|
|
decision.path.display(),
|
|
e
|
|
);
|
|
});
|
|
}
|
|
|
|
// If something is "keep", add it to the list
|
|
// of nodes on the DFS stack for the graph
|
|
// we are building, so that it will be kept along
|
|
// with any dependencies.
|
|
if decision.action == Action::Keep {
|
|
let ino = nix::sys::stat::lstat(&decision.path)?.st_ino;
|
|
inodes_to_keep.insert(ino);
|
|
}
|
|
|
|
// If something was marked as "keep" or "remove" before,
|
|
// we can immediately send it back as such, so that previous
|
|
// determinations are preserved.
|
|
//
|
|
// TODO: if the user forces the removal of a needed dependency
|
|
// of an ELF file, it is not for us to stop them, but we should
|
|
// probably warn them this is unwise?
|
|
if decision.action != Action::Undecided {
|
|
output.send(decision).await?;
|
|
}
|
|
}
|
|
|
|
if let Some(dot) = &self.output_dot {
|
|
state.debug_print_graph(&dot)?;
|
|
}
|
|
|
|
let mut dfs = Dfs::empty(&state.graph);
|
|
dfs.stack = inodes_to_keep.into_iter().collect();
|
|
while let Some(_) = dfs.next(&state.graph) {}
|
|
|
|
for (inode, paths) in state.paths_map.into_iter() {
|
|
let action = if !dfs.discovered.contains(&inode) {
|
|
// The file represented by this inode was unreachable after
|
|
// conducting our DFS, hence all its associated paths
|
|
// can be removed.
|
|
Action::Remove
|
|
} else {
|
|
Action::Undecided
|
|
};
|
|
|
|
for path in paths {
|
|
output.send(Decision { path, action }).await?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl Default for State {
|
|
fn default() -> Self {
|
|
let mut paths_map = InodeMap::default();
|
|
let mut graph = InodeGraph::default();
|
|
let fake_root_node = PathBuf::from("«root»");
|
|
|
|
paths_map.insert(ROOT_NODE, HashSet::from([fake_root_node]));
|
|
graph.add_node(ROOT_NODE);
|
|
|
|
Self { paths_map, graph }
|
|
}
|
|
}
|
|
|
|
impl State {
|
|
fn process_path(&mut self, path: &Path) -> Result<()> {
|
|
log::trace!("processing {}", path.display());
|
|
let mut f = File::open(path)?;
|
|
|
|
if !is_elf(&mut f).context(path.display().to_string())? {
|
|
return Ok(()); // Ignore non-ELF files
|
|
}
|
|
|
|
f.rewind()?;
|
|
let mmap = unsafe { Mmap::map(&f)? };
|
|
let elf = Elf::parse(&mmap)?;
|
|
|
|
if path.is_symlink() {
|
|
self.process_elf_symlink(path, &elf)
|
|
} else {
|
|
self.process_elf_file(path, &elf)
|
|
}
|
|
}
|
|
|
|
fn process_elf_symlink(&mut self, path: &Path, elf: &Elf) -> Result<()> {
|
|
let src = nix::sys::stat::lstat(path)?;
|
|
if !elf.is_lib {
|
|
// To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE
|
|
self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino);
|
|
}
|
|
|
|
let current_dir = std::env::current_dir()?;
|
|
let mut dst_path = std::fs::read_link(path)?;
|
|
if dst_path.is_absolute() {
|
|
dst_path = dst_path.strip_prefix("/")?.into();
|
|
} else {
|
|
let parent = path.parent().unwrap();
|
|
dst_path = fs::canonicalize(parent.join(dst_path))?
|
|
.strip_prefix(current_dir)?
|
|
.into();
|
|
}
|
|
|
|
let dst = nix::sys::stat::stat(&dst_path)?;
|
|
if src.st_dev != dst.st_dev {
|
|
log::warn!(
|
|
"dso: {} points outside of the sysroot filesystem, check if this is intended",
|
|
path.display()
|
|
);
|
|
} else {
|
|
log::trace!(
|
|
"dso: adding to graph symlink: '{}' to '{}'",
|
|
path.display(),
|
|
dst_path.display()
|
|
);
|
|
self.update_graph(path.into(), src.st_ino, dst_path, dst.st_ino);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn process_elf_file(&mut self, path: &Path, elf: &Elf) -> Result<()> {
|
|
log::trace!("dso: adding to graph elf file '{}'", path.display());
|
|
|
|
let src = nix::sys::stat::stat(path)?;
|
|
|
|
if !elf.is_lib {
|
|
// To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE
|
|
self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino);
|
|
}
|
|
|
|
let search_paths = determine_lib_search_paths(path, elf)?;
|
|
|
|
'next_lib: for &library in elf.libraries.iter() {
|
|
for lib_path in search_paths.iter() {
|
|
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
|
|
let dst = match nix::sys::stat::lstat(&tentative_path) {
|
|
Ok(dst) => dst,
|
|
Err(Errno::ENOENT) => continue,
|
|
Err(e) => anyhow::bail!(
|
|
"got errno {} while accessing {}",
|
|
e,
|
|
tentative_path.display()
|
|
),
|
|
};
|
|
|
|
if src.st_dev != dst.st_dev {
|
|
continue; // These are not the droids you are looking for.
|
|
}
|
|
|
|
self.update_graph(path.into(), src.st_ino, tentative_path, dst.st_ino);
|
|
continue 'next_lib;
|
|
}
|
|
|
|
anyhow::bail!("{}: unable to find library {}", path.display(), library);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn update_graph(
|
|
&mut self,
|
|
src_path: PathBuf,
|
|
src_inode: ino_t,
|
|
dst_path: PathBuf,
|
|
dst_inode: ino_t,
|
|
) {
|
|
self.paths_map
|
|
.entry(src_inode)
|
|
.or_default()
|
|
.insert(src_path);
|
|
|
|
self.paths_map
|
|
.entry(dst_inode)
|
|
.or_default()
|
|
.insert(dst_path);
|
|
|
|
self.graph.add_edge(src_inode, dst_inode, ());
|
|
}
|
|
|
|
fn debug_print_graph(&self, output_file: &Path) -> Result<()> {
|
|
use std::ffi::OsStr;
|
|
|
|
let mut output_dot = File::create(output_file)?;
|
|
write!(
|
|
&mut output_dot,
|
|
indoc::indoc! {
|
|
"digraph {{
|
|
rankdir=\"LR\"
|
|
{:?}
|
|
}}"
|
|
},
|
|
petgraph::dot::Dot::with_attr_getters(
|
|
&self.graph,
|
|
&[
|
|
dot::Config::NodeNoLabel,
|
|
dot::Config::EdgeNoLabel,
|
|
dot::Config::GraphContentOnly
|
|
],
|
|
&|_, _| { String::new() },
|
|
&|_, n| {
|
|
let paths = self.paths_map.get(&n.id()).unwrap();
|
|
let first_path = paths.iter().next().expect(&format!(
|
|
"dso: you have a path map with an empty entry for inode {}",
|
|
n.id()
|
|
));
|
|
format!(
|
|
"label = \"({}, {})\"",
|
|
n.weight(),
|
|
&first_path
|
|
.file_name()
|
|
.unwrap_or(OsStr::new("🚀"))
|
|
.to_string_lossy()
|
|
)
|
|
}
|
|
)
|
|
)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
fn determine_lib_search_paths(path: &Path, elf: &Elf<'_>) -> Result<Vec<String>> {
|
|
let mut search_paths = vec![];
|
|
|
|
let current_dir = std::env::current_dir()?;
|
|
let origin = std::fs::canonicalize(path)?
|
|
.parent()
|
|
.unwrap()
|
|
.strip_prefix(current_dir)?
|
|
.to_path_buf()
|
|
.into_os_string()
|
|
.into_string()
|
|
.map_err(|s| anyhow::anyhow!("cannot represent {:?} as a UTF-8 string", s))?;
|
|
|
|
if elf.rpaths != vec![""] {
|
|
if elf.runpaths != vec![""] {
|
|
let mut rpaths = elf
|
|
.rpaths
|
|
.iter()
|
|
.map(|p| p.replace("$ORIGIN", &origin))
|
|
.collect::<Vec<_>>();
|
|
search_paths.append(&mut rpaths);
|
|
}
|
|
|
|
search_paths.append(&mut get_env_library_paths());
|
|
}
|
|
|
|
if elf.runpaths != vec![""] {
|
|
let mut runpaths = elf
|
|
.runpaths
|
|
.iter()
|
|
.map(|p| p.replace("$ORIGIN", &origin))
|
|
.collect::<Vec<_>>();
|
|
search_paths.append(&mut runpaths);
|
|
}
|
|
|
|
search_paths.push("/usr/local/lib".into());
|
|
search_paths.push("/lib".into());
|
|
search_paths.push("/usr/lib".into());
|
|
Ok(search_paths)
|
|
}
|
|
|
|
fn get_env_library_paths() -> Vec<String> {
|
|
let ld_config_path = std::env::var("LD_LIBRARY_PATH");
|
|
ld_config_path
|
|
.as_ref()
|
|
.map(|env| {
|
|
env.split(':')
|
|
.filter(|s| s.is_empty())
|
|
.map(|s| s.into())
|
|
.collect()
|
|
})
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
fn is_elf(f: &mut File) -> Result<bool> {
|
|
let mut hdr = [0u8; 4];
|
|
if let Err(e) = f.read_exact(&mut hdr) {
|
|
if e.kind() != ErrorKind::UnexpectedEof {
|
|
anyhow::bail!(e)
|
|
}
|
|
|
|
return Ok(false);
|
|
};
|
|
|
|
Ok(&hdr == ELF_MAGIC_HEADER)
|
|
}
|