diff --git a/.gitignore b/.gitignore index a64393e..dbce2ee 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ # SPDX-License-Identifier: CC0-1.0 /target +/.vscode diff --git a/Cargo.lock b/Cargo.lock index 9367b05..3bd19fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -47,7 +62,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -58,7 +73,7 @@ checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", "once_cell", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -67,12 +82,50 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "async-trait" +version = "0.1.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + [[package]] name = "bitflags" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "bytes" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" + [[package]] name = "cfg-if" version = "1.0.0" @@ -166,6 +219,12 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + [[package]] name = "goblin" version = "0.9.3" @@ -217,6 +276,16 @@ version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.25" @@ -229,6 +298,26 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "miniz_oxide" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "nix" version = "0.29.0" @@ -241,12 +330,44 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "petgraph" version = "0.7.1" @@ -257,6 +378,12 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + [[package]] name = "plain" version = "0.2.3" @@ -281,6 +408,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.11.1" @@ -310,6 +446,27 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "scroll" version = "0.12.0" @@ -330,6 +487,31 @@ dependencies = [ "syn", ] +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "strsim" version = "0.11.1" @@ -352,12 +534,44 @@ name = "sysroot-cleaner" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "clap", "env_logger", "goblin", "log", "nix", "petgraph", + "tokio", + "walkdir", +] + +[[package]] +name = "tokio" +version = "1.43.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -372,6 +586,40 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 7f76944..44b67e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,12 @@ readme = "README.md" [dependencies] anyhow = { version = "1.0" } +async-trait = { version = "0.1" } clap = { version = "4.5", features = ["derive"] } env_logger = { version = "0.11" } goblin = { version = "0.9" } log = { version = "0.4" } nix = { version = "0.29" } petgraph = { version = "0.7" } +tokio = { version = "1", features = ["full"] } +walkdir = { version = "2" } diff --git a/src/args.rs b/src/args.rs index 01534e2..c70ecf8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,8 +1,23 @@ // SPDX-FileCopyrightText: Matteo Settenvini // SPDX-License-Identifier: EUPL-1.2 +use std::path::PathBuf; + use clap::Parser; +/// A tool to clean up sysroots for Linux embedded devices to save storage space. #[derive(Parser, Debug)] #[command(version, about, long_about = None)] -pub struct Args {} +pub struct Args { + /// Simulate operations without carrying them out + #[arg(short = 'n', long, default_value_t = false)] + pub dry_run: bool, + + /// Instead of simply removing files, move them to the + /// given location, preserving their relative folder structure + #[arg(long)] + pub split_to: Option, + + /// The location of the sysroot to clean up + pub sysroot_location: PathBuf, +} diff --git a/src/cleaners.rs b/src/cleaners.rs new file mode 100644 index 0000000..8628ebf --- /dev/null +++ b/src/cleaners.rs @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Matteo Settenvini +// SPDX-License-Identifier: EUPL-1.2 + +mod dso; + +use crate::{ + args::Args, + decision::{Action, Decision}, +}; +use anyhow::Result; +use async_trait::async_trait; +use dso::Dso; +use nix::libc::EXDEV; +use std::{ + collections::HashMap, + io, + path::{Path, PathBuf}, +}; +use tokio::{ + sync::{broadcast, mpsc}, + task::JoinSet, +}; +use walkdir::WalkDir; + +#[async_trait] +pub trait Cleaner { + async fn run( + &mut self, + files: broadcast::Receiver, + decisions: mpsc::Sender, + ) -> Result<()>; +} + +type RemovalFn = Box io::Result<()>>; + +pub struct Runner { + cleaners: Vec>, + removal_fn: RemovalFn, +} + +impl Runner { + pub fn new(args: Args) -> Self { + let removal_fn = Self::new_removal_fn(&args); + Self { + cleaners: vec![Box::new(Dso::new())], + removal_fn, + } + } + + pub async fn run(self) -> Result<()> { + let input_tx = broadcast::Sender::new(100); + let (output_tx, output_rx) = mpsc::channel(100); + let mut tasks = JoinSet::new(); + + // Processors + for mut cleaner in self.cleaners { + let input_rx = input_tx.subscribe(); + let output_tx_clone = output_tx.clone(); + tasks.spawn(async move { cleaner.run(input_rx, output_tx_clone).await }); + } + drop(output_tx); + + // Producer of inputs (note that this needs to happen + // after all channels have been created) + tasks.spawn(Self::input_producer(input_tx)); + + // Output consumer + Self::output_consumer(self.removal_fn, output_rx).await; + + while let Some(task) = tasks.join_next().await { + if let Err(err) = task? { + log::error!("{}", err); + } + } + Ok(()) + } + + async fn input_producer(input_tx: broadcast::Sender) -> Result<()> { + let walker = WalkDir::new("."); + for entry in walker { + match entry { + Ok(e) if !e.file_type().is_dir() => { + if input_tx.len() >= 75 { + // TODO: FIXME: make this better, e.g. use backoff, this is a quick + // hack + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + } + input_tx.send(e.into_path())?; + } + Ok(_) => continue, + Err(err) => log::warn!("unable to access path: {}", err), + } + } + + // we handle errors by warning the user, otherwise we always succeed + Ok(()) + } + + async fn output_consumer(removal_fn: RemovalFn, mut output_rx: mpsc::Receiver) { + let mut to_remove = HashMap::new(); + while let Some(decision) = output_rx.recv().await { + match to_remove.get_mut(&decision.path) { + Some(prev_action) => { + if decision.action == Action::Keep { + *prev_action = Action::Keep; + } + } + None => { + to_remove.insert(decision.path, decision.action); + } + } + } + + for (file, action) in to_remove { + if action == Action::Remove { + if let Err(err) = (removal_fn)(file) { + log::error!("{}", err); + } + } + } + } + + fn new_removal_fn(args: &Args) -> RemovalFn { + if let Some(dest) = args.split_to.clone() { + if args.dry_run { + Box::new(move |path| { + log::info!( + "(dry-run) would move {} to {}", + path.display(), + dest.display() + ); + Ok(()) + }) + } else { + Box::new(move |path| { + log::info!("moving {} to {}", path.display(), dest.display()); + Self::move_preserve(&path, &dest) + }) + } + } else { + if args.dry_run { + Box::new(|path| { + log::info!("(dry-run) would remove {}", path.display()); + Ok(()) + }) + } else { + Box::new(move |path| { + log::info!("removing {}", path.display()); + std::fs::remove_file(&path) + }) + } + } + } + + fn move_preserve(src: &Path, dest: &Path) -> io::Result<()> { + assert!(src.is_relative()); + let abs_dest = dest.join(src); + if let Some(parent) = abs_dest.parent() { + std::fs::create_dir_all(parent)?; + } + match std::fs::rename(&src, &abs_dest) { + Err(err) if err.raw_os_error() == Some(EXDEV) => { + log::trace!( + "different filesystems, falling back to copying {} to {}", + src.display(), + abs_dest.display() + ); + std::fs::copy(src, abs_dest).and_then(|_| std::fs::remove_file(src)) + } + other => other, + } + } +} diff --git a/src/cleaners/dso.rs b/src/cleaners/dso.rs new file mode 100644 index 0000000..582165a --- /dev/null +++ b/src/cleaners/dso.rs @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Matteo Settenvini +// SPDX-License-Identifier: EUPL-1.2 + +use super::Cleaner; +use crate::decision::{Action, Decision}; +use anyhow::Result; +use async_trait::async_trait; +use std::path::PathBuf; +use tokio::sync::{broadcast, mpsc}; + +/// Cleans up unused shared libraries +/// and warns about broken dependencies as well +pub struct Dso {} + +impl Dso { + pub fn new() -> Self { + Self {} + } +} + +#[async_trait] +impl Cleaner for Dso { + async fn run( + &mut self, + mut files: broadcast::Receiver, + decisions: mpsc::Sender, + ) -> Result<()> { + while let Ok(file) = files.recv().await { + // TODO: handle Lagged? + decisions + .send(Decision { + path: file, + action: Action::Remove, + }) + .await?; + } + + Ok(()) + } +} diff --git a/src/decision.rs b/src/decision.rs new file mode 100644 index 0000000..e0e0895 --- /dev/null +++ b/src/decision.rs @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Matteo Settenvini +// SPDX-License-Identifier: EUPL-1.2 + +use std::path::PathBuf; + +#[derive(PartialEq, Eq)] +pub enum Action { + Keep, + Remove, +} + +pub struct Decision { + pub path: PathBuf, + pub action: Action, + //pub reason: Option, +} diff --git a/src/main.rs b/src/main.rs index e3077ef..8f9b255 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,16 +2,24 @@ // SPDX-License-Identifier: EUPL-1.2 mod args; +mod cleaners; +mod decision; use anyhow::Result; use args::Args; use clap::Parser as _; +use cleaners::Runner; use env_logger::Env; -fn main() -> Result<()> { - let logging_env = Env::default().default_filter_or("info"); +#[tokio::main] +async fn main() -> Result<()> { + let logging_env = Env::default().filter_or("LOG_LEVEL", "warn"); env_logger::Builder::from_env(logging_env).init(); - let _args = Args::try_parse()?; + let args = Args::try_parse()?; + std::env::set_current_dir(&args.sysroot_location)?; + + let runner = Runner::new(args); + runner.run().await?; Ok(()) }