feat(dso): implement unused DSO cleaner
This is the first implementation of a recursive DSO cleaner which is reasonably fast. The implementation is still (unit-) untested. Of course, modules to be dlopen'ed will always escape such a tool, which will need to support another cleaner plugin with a whitelist.
This commit is contained in:
parent
5507a1dd21
commit
54075012aa
|
@ -298,6 +298,15 @@ version = "2.7.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.3"
|
||||
|
@ -539,6 +548,7 @@ dependencies = [
|
|||
"env_logger",
|
||||
"goblin",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nix",
|
||||
"petgraph",
|
||||
"tokio",
|
||||
|
|
|
@ -16,7 +16,8 @@ clap = { version = "4.5", features = ["derive"] }
|
|||
env_logger = { version = "0.11" }
|
||||
goblin = { version = "0.9" }
|
||||
log = { version = "0.4" }
|
||||
nix = { version = "0.29" }
|
||||
memmap2 = { version = "0.9" }
|
||||
nix = { version = "0.29", features = ["fs"] }
|
||||
petgraph = { version = "0.7" }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
walkdir = { version = "2" }
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::{
|
|||
};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use dso::Dso;
|
||||
use dso::DsoCleaner;
|
||||
use nix::libc::EXDEV;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
|
@ -20,7 +20,7 @@ use tokio::{
|
|||
sync::{broadcast, mpsc},
|
||||
task::JoinSet,
|
||||
};
|
||||
use walkdir::WalkDir;
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Cleaner {
|
||||
|
@ -38,18 +38,21 @@ pub struct Runner {
|
|||
removal_fn: RemovalFn,
|
||||
}
|
||||
|
||||
const CHANNEL_SIZE: usize = 100;
|
||||
const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
|
||||
|
||||
impl Runner {
|
||||
pub fn new(args: Args) -> Self {
|
||||
let removal_fn = Self::new_removal_fn(&args);
|
||||
Self {
|
||||
cleaners: vec![Box::new(Dso::new())],
|
||||
cleaners: vec![Box::new(DsoCleaner::default())],
|
||||
removal_fn,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(self) -> Result<()> {
|
||||
let input_tx = broadcast::Sender::new(100);
|
||||
let (output_tx, output_rx) = mpsc::channel(100);
|
||||
let input_tx = broadcast::Sender::new(CHANNEL_SIZE);
|
||||
let (output_tx, output_rx) = mpsc::channel(CHANNEL_SIZE);
|
||||
let mut tasks = JoinSet::new();
|
||||
|
||||
// Processors
|
||||
|
@ -76,14 +79,13 @@ impl Runner {
|
|||
}
|
||||
|
||||
async fn input_producer(input_tx: broadcast::Sender<PathBuf>) -> Result<()> {
|
||||
let walker = WalkDir::new(".");
|
||||
let walker = WalkDir::new(".").follow_links(false);
|
||||
for entry in walker {
|
||||
match entry {
|
||||
Ok(e) if !e.file_type().is_dir() => {
|
||||
if input_tx.len() >= 75 {
|
||||
// TODO: FIXME: make this better, e.g. use backoff, this is a quick
|
||||
// hack
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
Ok(e) if !Self::is_dir(&e) => {
|
||||
if input_tx.len() >= CHANNEL_MAX_LOAD {
|
||||
// TODO: FIXME: make this better, this is a quick hack
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
}
|
||||
input_tx.send(e.into_path())?;
|
||||
}
|
||||
|
@ -96,6 +98,28 @@ impl Runner {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn is_dir(entry: &DirEntry) -> bool {
|
||||
let ty = entry.file_type();
|
||||
if ty.is_dir() {
|
||||
true
|
||||
} else if ty.is_file() {
|
||||
false
|
||||
} else {
|
||||
// it is a symlink
|
||||
match std::fs::metadata(entry.path()) {
|
||||
Ok(metadata) => metadata.is_dir(),
|
||||
Err(e) => {
|
||||
log::debug!(
|
||||
"unable to resolve symlink {}: {}",
|
||||
entry.path().display(),
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn output_consumer(removal_fn: RemovalFn, mut output_rx: mpsc::Receiver<Decision>) {
|
||||
let mut to_remove = HashMap::new();
|
||||
while let Some(decision) = output_rx.recv().await {
|
||||
|
|
|
@ -5,31 +5,87 @@ use super::Cleaner;
|
|||
use crate::decision::{Action, Decision};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::{broadcast, mpsc};
|
||||
use goblin::elf::Elf;
|
||||
use memmap2::Mmap;
|
||||
use nix::{errno::Errno, libc::ino_t};
|
||||
use petgraph::{prelude::DiGraphMap, visit::Dfs};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fs::File,
|
||||
io::{ErrorKind, Read, Seek},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use tokio::sync::{
|
||||
broadcast::{self, error::RecvError},
|
||||
mpsc,
|
||||
};
|
||||
|
||||
type InodeMap = HashMap<ino_t, HashSet<PathBuf>>;
|
||||
type InodeGraph = DiGraphMap<ino_t, ()>;
|
||||
|
||||
/// Cleans up unused shared libraries
|
||||
/// and warns about broken dependencies as well
|
||||
pub struct Dso {}
|
||||
#[derive(Default)]
|
||||
pub struct DsoCleaner {}
|
||||
|
||||
impl Dso {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
#[derive(Default)]
|
||||
struct State {
|
||||
paths_map: InodeMap,
|
||||
graph: InodeGraph,
|
||||
}
|
||||
|
||||
const INODE_ANY_EXECUTABLE: ino_t = 0;
|
||||
const ELF_MAGIC_HEADER: &[u8; 4] = b"\x7fELF";
|
||||
|
||||
#[async_trait]
|
||||
impl Cleaner for Dso {
|
||||
impl Cleaner for DsoCleaner {
|
||||
async fn run(
|
||||
&mut self,
|
||||
mut files: broadcast::Receiver<PathBuf>,
|
||||
decisions: mpsc::Sender<Decision>,
|
||||
) -> Result<()> {
|
||||
while let Ok(file) = files.recv().await {
|
||||
// TODO: handle Lagged?
|
||||
let mut state = State::default();
|
||||
|
||||
loop {
|
||||
match files.recv().await {
|
||||
Ok(file) => {
|
||||
if let Err(e) = Self::process_file(&mut state, &file) {
|
||||
log::warn!("{}: {}", file.display(), e);
|
||||
}
|
||||
}
|
||||
Err(RecvError::Closed) => break,
|
||||
e => {
|
||||
e?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// println!(
|
||||
// "{:?}",
|
||||
// petgraph::dot::Dot::with_config(&state.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
||||
// );
|
||||
|
||||
let mut dfs = Dfs::empty(&state.graph);
|
||||
if state.graph.contains_node(INODE_ANY_EXECUTABLE) {
|
||||
dfs.move_to(INODE_ANY_EXECUTABLE);
|
||||
}
|
||||
while let Some(_) = dfs.next(&state.graph) {}
|
||||
|
||||
for path in state
|
||||
.paths_map
|
||||
.into_iter()
|
||||
.filter_map(|(n, paths)| {
|
||||
if !dfs.discovered.contains(&n) {
|
||||
Some(paths)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
{
|
||||
decisions
|
||||
.send(Decision {
|
||||
path: file,
|
||||
path,
|
||||
action: Action::Remove,
|
||||
})
|
||||
.await?;
|
||||
|
@ -38,3 +94,180 @@ impl Cleaner for Dso {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl DsoCleaner {
|
||||
fn process_file(state: &mut State, path: &Path) -> Result<()> {
|
||||
let mut f = File::open(path)?;
|
||||
let mut hdr = [0u8; 4];
|
||||
if let Err(e) = f.read_exact(&mut hdr) {
|
||||
if e.kind() != ErrorKind::UnexpectedEof {
|
||||
anyhow::bail!(e)
|
||||
}
|
||||
|
||||
return Ok(()); // not ELF, ignore
|
||||
};
|
||||
|
||||
let is_elf = &hdr == ELF_MAGIC_HEADER;
|
||||
if !is_elf {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
f.rewind()?;
|
||||
let mmap = unsafe { Mmap::map(&f)? };
|
||||
let elf = Elf::parse(&mmap)?;
|
||||
|
||||
if path.is_symlink() {
|
||||
if !elf.is_lib {
|
||||
// we don't care about symlinks to
|
||||
// executables in our graph, as we
|
||||
// are cleaning up only DSOs.
|
||||
Ok(())
|
||||
} else {
|
||||
Self::process_elf_symlink(state, path)
|
||||
}
|
||||
} else {
|
||||
Self::process_elf_file(state, path, &elf)
|
||||
}
|
||||
}
|
||||
|
||||
fn process_elf_symlink(state: &mut State, path: &Path) -> Result<()> {
|
||||
let src = nix::sys::stat::lstat(path)?;
|
||||
let dst = nix::sys::stat::stat(path)?;
|
||||
if src.st_dev != dst.st_dev {
|
||||
log::warn!(
|
||||
"{} points outside of the sysroot filesystem, check if this is intended",
|
||||
path.display()
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
let current_dir = std::env::current_dir()?;
|
||||
let dst_path = std::fs::canonicalize(path)?
|
||||
.strip_prefix(current_dir)?
|
||||
.to_path_buf();
|
||||
|
||||
log::trace!(
|
||||
"dso: adding to graph symlink: '{}' to '{}'",
|
||||
path.display(),
|
||||
dst_path.display()
|
||||
);
|
||||
|
||||
state
|
||||
.paths_map
|
||||
.entry(src.st_ino)
|
||||
.or_default()
|
||||
.insert(path.into());
|
||||
|
||||
state
|
||||
.paths_map
|
||||
.entry(dst.st_ino)
|
||||
.or_default()
|
||||
.insert(dst_path);
|
||||
|
||||
state.graph.add_edge(src.st_ino, dst.st_ino, ());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn process_elf_file(state: &mut State, path: &Path, elf: &Elf) -> Result<()> {
|
||||
log::trace!("dso: adding to graph elf file '{}'", path.display());
|
||||
|
||||
let current_dir = std::env::current_dir()?;
|
||||
let origin = std::fs::canonicalize(path)?
|
||||
.parent()
|
||||
.unwrap()
|
||||
.strip_prefix(current_dir)?
|
||||
.to_path_buf()
|
||||
.into_os_string()
|
||||
.into_string()
|
||||
.map_err(|s| anyhow::anyhow!("cannot represent {:?} as a UTF-8 string", s))?;
|
||||
|
||||
let mut search_paths = vec![];
|
||||
|
||||
if elf.rpaths != vec![""] {
|
||||
if elf.runpaths != vec![""] {
|
||||
let mut rpaths = elf
|
||||
.rpaths
|
||||
.iter()
|
||||
.map(|p| p.replace("$ORIGIN", &origin))
|
||||
.collect::<Vec<_>>();
|
||||
search_paths.append(&mut rpaths);
|
||||
}
|
||||
|
||||
let ld_config_path = std::env::var("LD_LIBRARY_PATH");
|
||||
let mut env_paths = ld_config_path
|
||||
.as_ref()
|
||||
.map(|env| {
|
||||
env.split(':')
|
||||
.filter_map(|dir| {
|
||||
if dir.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(dir.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
search_paths.append(&mut env_paths);
|
||||
}
|
||||
|
||||
if elf.runpaths != vec![""] {
|
||||
let mut runpaths = elf
|
||||
.runpaths
|
||||
.iter()
|
||||
.map(|p| p.replace("$ORIGIN", &origin))
|
||||
.collect::<Vec<_>>();
|
||||
search_paths.append(&mut runpaths);
|
||||
}
|
||||
|
||||
// Standard dirs:
|
||||
search_paths.push("/usr/local/lib".into());
|
||||
search_paths.push("/lib".into());
|
||||
search_paths.push("/usr/lib".into());
|
||||
|
||||
let src_stat = nix::sys::stat::stat(path)?;
|
||||
let src_inode = if elf.is_lib {
|
||||
src_stat.st_ino
|
||||
} else {
|
||||
// We put all executables in the same node
|
||||
INODE_ANY_EXECUTABLE
|
||||
};
|
||||
|
||||
'next_lib: for &library in elf.libraries.iter() {
|
||||
for lib_path in search_paths.iter() {
|
||||
let tentative_path = PathBuf::from(lib_path).strip_prefix("/")?.join(library);
|
||||
let dst = match nix::sys::stat::stat(&tentative_path) {
|
||||
Ok(dst) => dst,
|
||||
Err(Errno::ENOENT) => continue,
|
||||
Err(e) => anyhow::bail!(
|
||||
"got errno {} while accessing {}",
|
||||
e,
|
||||
tentative_path.display()
|
||||
),
|
||||
};
|
||||
|
||||
if src_stat.st_dev != dst.st_dev {
|
||||
continue; // These are not the droids you are looking for.
|
||||
}
|
||||
|
||||
state
|
||||
.paths_map
|
||||
.entry(src_inode)
|
||||
.or_default()
|
||||
.insert(path.into());
|
||||
|
||||
state
|
||||
.paths_map
|
||||
.entry(dst.st_ino)
|
||||
.or_default()
|
||||
.insert(tentative_path);
|
||||
|
||||
state.graph.add_edge(src_inode, dst.st_ino, ());
|
||||
continue 'next_lib;
|
||||
}
|
||||
|
||||
anyhow::bail!("{}: unable to find library {}", path.display(), library);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,10 +13,12 @@ use env_logger::Env;
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let logging_env = Env::default().filter_or("LOG_LEVEL", "warn");
|
||||
let args = Args::try_parse()?;
|
||||
|
||||
let logging_env =
|
||||
Env::default().filter_or("LOG_LEVEL", if args.dry_run { "info" } else { "warn" });
|
||||
env_logger::Builder::from_env(logging_env).init();
|
||||
|
||||
let args = Args::try_parse()?;
|
||||
std::env::set_current_dir(&args.sysroot_location)?;
|
||||
|
||||
let runner = Runner::new(args);
|
||||
|
|
Loading…
Reference in New Issue