sysroot-cleaner/src/cleaners/dso.rs

355 lines
11 KiB
Rust

// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use super::Cleaner;
use crate::decision::{Action, Decision};
use anyhow::{Context, Result};
use async_trait::async_trait;
use goblin::elf::Elf;
use memmap2::Mmap;
use nix::{errno::Errno, libc::ino_t};
use petgraph::{
dot,
prelude::DiGraphMap,
visit::{Dfs, NodeRef},
};
use std::{
collections::{HashMap, HashSet},
fs::{self, File},
io::{ErrorKind, Read, Seek, Write},
path::{Path, PathBuf},
};
use tokio::sync::mpsc;
type InodeMap = HashMap<ino_t, HashSet<PathBuf>>;
type InodeGraph = DiGraphMap<ino_t, ()>;
/// Cleans up unused shared libraries
/// and warns about broken dependencies as well
pub struct DsoCleaner {
output_dot: Option<PathBuf>,
}
struct State {
paths_map: InodeMap,
graph: InodeGraph,
}
impl DsoCleaner {
pub fn new(output_dot: Option<PathBuf>) -> Self {
Self { output_dot }
}
}
const ROOT_NODE: ino_t = 0;
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
#[async_trait]
impl Cleaner for DsoCleaner {
async fn run(
&mut self,
mut input: mpsc::Receiver<Decision>,
output: mpsc::Sender<Decision>,
) -> Result<()> {
let mut state = State::default();
let mut inodes_to_keep = HashSet::new();
inodes_to_keep.insert(ROOT_NODE);
while let Some(decision) = input.recv().await {
// If we know something MUST be removed (e.g. a binary or
// a symlink) it makes sense now to avoid adding it to the graph so
// that also its dependencies will not be kept.
if decision.action != Action::Remove {
state.process_path(&decision.path).unwrap_or_else(|e| {
log::warn!(
"{}: {} (this might produce wrong results!)",
decision.path.display(),
e
);
});
}
// If something is "keep", add it to the list
// of nodes on the DFS stack for the graph
// we are building, so that it will be kept along
// with any dependencies.
if decision.action == Action::Keep {
let ino = nix::sys::stat::lstat(&decision.path)?.st_ino;
inodes_to_keep.insert(ino);
}
// If something was marked as "keep" or "remove" before,
// we can immediately send it back as such, so that previous
// determinations are preserved.
//
// TODO: if the user forces the removal of a needed dependency
// of an ELF file, it is not for us to stop them, but we should
// probably warn them this is unwise?
if decision.action != Action::Undecided {
output.send(decision).await?;
}
}
if let Some(dot) = &self.output_dot {
state.debug_print_graph(&dot)?;
}
let mut dfs = Dfs::empty(&state.graph);
dfs.stack = inodes_to_keep.into_iter().collect();
while let Some(_) = dfs.next(&state.graph) {}
for (inode, paths) in state.paths_map.into_iter() {
let action = if !dfs.discovered.contains(&inode) {
// The file represented by this inode was unreachable after
// conducting our DFS, hence all its associated paths
// can be removed.
Action::Remove
} else {
Action::Undecided
};
for path in paths {
output.send(Decision { path, action }).await?;
}
}
Ok(())
}
}
impl Default for State {
fn default() -> Self {
let mut paths_map = InodeMap::default();
let mut graph = InodeGraph::default();
let fake_root_node = PathBuf::from("«root»");
paths_map.insert(ROOT_NODE, HashSet::from([fake_root_node]));
graph.add_node(ROOT_NODE);
Self { paths_map, graph }
}
}
impl State {
fn process_path(&mut self, path: &Path) -> Result<()> {
log::trace!("processing {}", path.display());
let mut f = File::open(path)?;
if !is_elf(&mut f).context(path.display().to_string())? {
return Ok(()); // Ignore non-ELF files
}
f.rewind()?;
let mmap = unsafe { Mmap::map(&f)? };
let elf = Elf::parse(&mmap)?;
if path.is_symlink() {
self.process_elf_symlink(path, &elf)
} else {
self.process_elf_file(path, &elf)
}
}
fn process_elf_symlink(&mut self, path: &Path, elf: &Elf) -> Result<()> {
let src = nix::sys::stat::lstat(path)?;
if !elf.is_lib {
// To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE
self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino);
}
let current_dir = std::env::current_dir()?;
let mut dst_path = std::fs::read_link(path)?;
if dst_path.is_absolute() {
dst_path = dst_path.strip_prefix("/")?.into();
} else {
let parent = path.parent().unwrap();
dst_path = fs::canonicalize(parent.join(dst_path))?
.strip_prefix(current_dir)?
.into();
}
let dst = nix::sys::stat::stat(&dst_path)?;
if src.st_dev != dst.st_dev {
log::warn!(
"dso: {} points outside of the sysroot filesystem, check if this is intended",
path.display()
);
} else {
log::trace!(
"dso: adding to graph symlink: '{}' to '{}'",
path.display(),
dst_path.display()
);
self.update_graph(path.into(), src.st_ino, dst_path, dst.st_ino);
}
Ok(())
}
fn process_elf_file(&mut self, path: &Path, elf: &Elf) -> Result<()> {
log::trace!("dso: adding to graph elf file '{}'", path.display());
let src = nix::sys::stat::stat(path)?;
if !elf.is_lib {
// To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE
self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino);
}
let search_paths = determine_lib_search_paths(path, elf)?;
'next_lib: for &library in elf.libraries.iter() {
for lib_path in search_paths.iter() {
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
let dst = match nix::sys::stat::lstat(&tentative_path) {
Ok(dst) => dst,
Err(Errno::ENOENT) => continue,
Err(e) => anyhow::bail!(
"got errno {} while accessing {}",
e,
tentative_path.display()
),
};
if src.st_dev != dst.st_dev {
continue; // These are not the droids you are looking for.
}
self.update_graph(path.into(), src.st_ino, tentative_path, dst.st_ino);
continue 'next_lib;
}
anyhow::bail!("{}: unable to find library {}", path.display(), library);
}
Ok(())
}
fn update_graph(
&mut self,
src_path: PathBuf,
src_inode: ino_t,
dst_path: PathBuf,
dst_inode: ino_t,
) {
self.paths_map
.entry(src_inode)
.or_default()
.insert(src_path);
self.paths_map
.entry(dst_inode)
.or_default()
.insert(dst_path);
self.graph.add_edge(src_inode, dst_inode, ());
}
fn debug_print_graph(&self, output_file: &Path) -> Result<()> {
use std::ffi::OsStr;
let mut output_dot = File::create(output_file)?;
write!(
&mut output_dot,
indoc::indoc! {
"digraph {{
rankdir=\"LR\"
{:?}
}}"
},
petgraph::dot::Dot::with_attr_getters(
&self.graph,
&[
dot::Config::NodeNoLabel,
dot::Config::EdgeNoLabel,
dot::Config::GraphContentOnly
],
&|_, _| { String::new() },
&|_, n| {
let paths = self.paths_map.get(&n.id()).unwrap();
let first_path = paths.iter().next().expect(&format!(
"dso: you have a path map with an empty entry for inode {}",
n.id()
));
format!(
"label = \"({}, {})\"",
n.weight(),
&first_path
.file_name()
.unwrap_or(OsStr::new("🚀"))
.to_string_lossy()
)
}
)
)?;
Ok(())
}
}
fn determine_lib_search_paths(path: &Path, elf: &Elf<'_>) -> Result<Vec<String>> {
let mut search_paths = vec![];
let current_dir = std::env::current_dir()?;
let origin = std::fs::canonicalize(path)?
.parent()
.unwrap()
.strip_prefix(current_dir)?
.to_path_buf()
.into_os_string()
.into_string()
.map_err(|s| anyhow::anyhow!("cannot represent {:?} as a UTF-8 string", s))?;
if elf.rpaths != vec![""] {
if elf.runpaths != vec![""] {
let mut rpaths = elf
.rpaths
.iter()
.map(|p| p.replace("$ORIGIN", &origin))
.collect::<Vec<_>>();
search_paths.append(&mut rpaths);
}
search_paths.append(&mut get_env_library_paths());
}
if elf.runpaths != vec![""] {
let mut runpaths = elf
.runpaths
.iter()
.map(|p| p.replace("$ORIGIN", &origin))
.collect::<Vec<_>>();
search_paths.append(&mut runpaths);
}
search_paths.push("/usr/local/lib".into());
search_paths.push("/lib".into());
search_paths.push("/usr/lib".into());
Ok(search_paths)
}
fn get_env_library_paths() -> Vec<String> {
let ld_config_path = std::env::var("LD_LIBRARY_PATH");
ld_config_path
.as_ref()
.map(|env| {
env.split(':')
.filter(|s| s.is_empty())
.map(|s| s.into())
.collect()
})
.unwrap_or_default()
}
fn is_elf(f: &mut File) -> Result<bool> {
let mut hdr = [0u8; 4];
if let Err(e) = f.read_exact(&mut hdr) {
if e.kind() != ErrorKind::UnexpectedEof {
anyhow::bail!(e)
}
return Ok(false);
};
Ok(&hdr == ELF_MAGIC_HEADER)
}