// SPDX-FileCopyrightText: Matteo Settenvini // SPDX-License-Identifier: EUPL-1.2 use super::Cleaner; use crate::decision::{Action, Decision}; use anyhow::{Context, Result}; use async_trait::async_trait; use goblin::elf::Elf; use memmap2::Mmap; use nix::{errno::Errno, libc::ino_t}; use petgraph::{ dot, prelude::DiGraphMap, visit::{Dfs, NodeRef}, }; use std::{ collections::{HashMap, HashSet}, fs::{self, File}, io::{ErrorKind, Read, Seek, Write}, path::{Path, PathBuf}, }; use tokio::sync::mpsc; type InodeMap = HashMap>; type InodeGraph = DiGraphMap; /// Cleans up unused shared libraries /// and warns about broken dependencies as well pub struct DsoCleaner { output_dot: Option, } struct State { paths_map: InodeMap, graph: InodeGraph, } impl DsoCleaner { pub fn new(output_dot: Option) -> Self { Self { output_dot } } } const ROOT_NODE: ino_t = 0; const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF"; #[async_trait] impl Cleaner for DsoCleaner { async fn run( &mut self, mut input: mpsc::Receiver, output: mpsc::Sender, ) -> Result<()> { let mut state = State::default(); let mut inodes_to_keep = HashSet::new(); inodes_to_keep.insert(ROOT_NODE); while let Some(decision) = input.recv().await { // If we know something MUST be removed (e.g. a binary or // a symlink) it makes sense now to avoid adding it to the graph so // that also its dependencies will not be kept. if decision.action != Action::Remove { state.process_path(&decision.path).unwrap_or_else(|e| { log::warn!( "{}: {} (this might produce wrong results!)", decision.path.display(), e ); }); } // If something is "keep", add it to the list // of nodes on the DFS stack for the graph // we are building, so that it will be kept along // with any dependencies. if decision.action == Action::Keep { let ino = nix::sys::stat::lstat(&decision.path)?.st_ino; inodes_to_keep.insert(ino); } // If something was marked as "keep" or "remove" before, // we can immediately send it back as such, so that previous // determinations are preserved. // // TODO: if the user forces the removal of a needed dependency // of an ELF file, it is not for us to stop them, but we should // probably warn them this is unwise? if decision.action != Action::Undecided { output.send(decision).await?; } } if let Some(dot) = &self.output_dot { state.debug_print_graph(&dot)?; } let mut dfs = Dfs::empty(&state.graph); dfs.stack = inodes_to_keep.into_iter().collect(); while let Some(_) = dfs.next(&state.graph) {} for (inode, paths) in state.paths_map.into_iter() { let action = if !dfs.discovered.contains(&inode) { // The file represented by this inode was unreachable after // conducting our DFS, hence all its associated paths // can be removed. Action::Remove } else { Action::Undecided }; for path in paths { output.send(Decision { path, action }).await?; } } Ok(()) } } impl Default for State { fn default() -> Self { let mut paths_map = InodeMap::default(); let mut graph = InodeGraph::default(); let fake_root_node = PathBuf::from("«root»"); paths_map.insert(ROOT_NODE, HashSet::from([fake_root_node])); graph.add_node(ROOT_NODE); Self { paths_map, graph } } } impl State { fn process_path(&mut self, path: &Path) -> Result<()> { log::trace!("processing {}", path.display()); let mut f = File::open(path)?; if !is_elf(&mut f).context(path.display().to_string())? { return Ok(()); // Ignore non-ELF files } f.rewind()?; let mmap = unsafe { Mmap::map(&f)? }; let elf = Elf::parse(&mmap)?; if path.is_symlink() { self.process_elf_symlink(path, &elf) } else { self.process_elf_file(path, &elf) } } fn process_elf_symlink(&mut self, path: &Path, elf: &Elf) -> Result<()> { let src = nix::sys::stat::lstat(path)?; if !elf.is_lib { // To be able to use DFS on the graph later, we link each executable symlink to a fake ROOT_NODE self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino); } let current_dir = std::env::current_dir()?; let mut dst_path = std::fs::read_link(path)?; if dst_path.is_absolute() { dst_path = dst_path.strip_prefix("/")?.into(); } else { let parent = path.parent().unwrap(); dst_path = fs::canonicalize(parent.join(dst_path))? .strip_prefix(current_dir)? .into(); } let dst = nix::sys::stat::stat(&dst_path)?; if src.st_dev != dst.st_dev { log::warn!( "dso: {} points outside of the sysroot filesystem, check if this is intended", path.display() ); } else { log::trace!( "dso: adding to graph symlink: '{}' to '{}'", path.display(), dst_path.display() ); self.update_graph(path.into(), src.st_ino, dst_path, dst.st_ino); } Ok(()) } fn process_elf_file(&mut self, path: &Path, elf: &Elf) -> Result<()> { log::trace!("dso: adding to graph elf file '{}'", path.display()); let src = nix::sys::stat::stat(path)?; if !elf.is_lib { // To be able to use DFS on the graph later, we link each executable to a fake ROOT_NODE self.update_graph("".into(), ROOT_NODE, path.to_owned(), src.st_ino); } let search_paths = determine_lib_search_paths(path, elf)?; 'next_lib: for &library in elf.libraries.iter() { for lib_path in search_paths.iter() { let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library); let dst = match nix::sys::stat::lstat(&tentative_path) { Ok(dst) => dst, Err(Errno::ENOENT) => continue, Err(e) => anyhow::bail!( "got errno {} while accessing {}", e, tentative_path.display() ), }; if src.st_dev != dst.st_dev { continue; // These are not the droids you are looking for. } self.update_graph(path.into(), src.st_ino, tentative_path, dst.st_ino); continue 'next_lib; } anyhow::bail!("{}: unable to find library {}", path.display(), library); } Ok(()) } fn update_graph( &mut self, src_path: PathBuf, src_inode: ino_t, dst_path: PathBuf, dst_inode: ino_t, ) { self.paths_map .entry(src_inode) .or_default() .insert(src_path); self.paths_map .entry(dst_inode) .or_default() .insert(dst_path); self.graph.add_edge(src_inode, dst_inode, ()); } fn debug_print_graph(&self, output_file: &Path) -> Result<()> { use std::ffi::OsStr; let mut output_dot = File::create(output_file)?; write!( &mut output_dot, indoc::indoc! { "digraph {{ rankdir=\"LR\" {:?} }}" }, petgraph::dot::Dot::with_attr_getters( &self.graph, &[ dot::Config::NodeNoLabel, dot::Config::EdgeNoLabel, dot::Config::GraphContentOnly ], &|_, _| { String::new() }, &|_, n| { let paths = self.paths_map.get(&n.id()).unwrap(); let first_path = paths.iter().next().expect(&format!( "dso: you have a path map with an empty entry for inode {}", n.id() )); format!( "label = \"({}, {})\"", n.weight(), &first_path .file_name() .unwrap_or(OsStr::new("🚀")) .to_string_lossy() ) } ) )?; Ok(()) } } fn determine_lib_search_paths(path: &Path, elf: &Elf<'_>) -> Result> { let mut search_paths = vec![]; let current_dir = std::env::current_dir()?; let origin = std::fs::canonicalize(path)? .parent() .unwrap() .strip_prefix(current_dir)? .to_path_buf() .into_os_string() .into_string() .map_err(|s| anyhow::anyhow!("cannot represent {:?} as a UTF-8 string", s))?; if elf.rpaths != vec![""] { if elf.runpaths != vec![""] { let mut rpaths = elf .rpaths .iter() .map(|p| p.replace("$ORIGIN", &origin)) .collect::>(); search_paths.append(&mut rpaths); } search_paths.append(&mut get_env_library_paths()); } if elf.runpaths != vec![""] { let mut runpaths = elf .runpaths .iter() .map(|p| p.replace("$ORIGIN", &origin)) .collect::>(); search_paths.append(&mut runpaths); } search_paths.push("/usr/local/lib".into()); search_paths.push("/lib".into()); search_paths.push("/usr/lib".into()); Ok(search_paths) } fn get_env_library_paths() -> Vec { let ld_config_path = std::env::var("LD_LIBRARY_PATH"); ld_config_path .as_ref() .map(|env| { env.split(':') .filter(|s| s.is_empty()) .map(|s| s.into()) .collect() }) .unwrap_or_default() } fn is_elf(f: &mut File) -> Result { let mut hdr = [0u8; 4]; if let Err(e) = f.read_exact(&mut hdr) { if e.kind() != ErrorKind::UnexpectedEof { anyhow::bail!(e) } return Ok(false); }; Ok(&hdr == ELF_MAGIC_HEADER) }