feat(cleaners): add main async runner

Adds a Runner able to work upon a set of cleaners that
work in parallel.
This commit is contained in:
Matteo Settenvini 2025-01-25 02:19:48 +01:00
parent cc1a66726e
commit 5507a1dd21
Signed by: matteo
GPG Key ID: 1C1B12600D81DE05
8 changed files with 510 additions and 6 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
# SPDX-License-Identifier: CC0-1.0 # SPDX-License-Identifier: CC0-1.0
/target /target
/.vscode

252
Cargo.lock generated
View File

@ -2,6 +2,21 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "addr2line"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
dependencies = [
"gimli",
]
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "1.1.3" version = "1.1.3"
@ -47,7 +62,7 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [ dependencies = [
"windows-sys", "windows-sys 0.59.0",
] ]
[[package]] [[package]]
@ -58,7 +73,7 @@ checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"once_cell", "once_cell",
"windows-sys", "windows-sys 0.59.0",
] ]
[[package]] [[package]]
@ -67,12 +82,50 @@ version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
[[package]]
name = "async-trait"
version = "0.1.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "backtrace"
version = "0.3.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
dependencies = [
"addr2line",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.8.0" version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
name = "bytes"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -166,6 +219,12 @@ version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]] [[package]]
name = "goblin" name = "goblin"
version = "0.9.3" version = "0.9.3"
@ -217,6 +276,16 @@ version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.25" version = "0.4.25"
@ -229,6 +298,26 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
dependencies = [
"adler2",
]
[[package]]
name = "mio"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"wasi",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "nix" name = "nix"
version = "0.29.0" version = "0.29.0"
@ -241,12 +330,44 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.20.2" version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "parking_lot"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]] [[package]]
name = "petgraph" name = "petgraph"
version = "0.7.1" version = "0.7.1"
@ -257,6 +378,12 @@ dependencies = [
"indexmap", "indexmap",
] ]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]] [[package]]
name = "plain" name = "plain"
version = "0.2.3" version = "0.2.3"
@ -281,6 +408,15 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "redox_syscall"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.1"
@ -310,6 +446,27 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-demangle"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "scroll" name = "scroll"
version = "0.12.0" version = "0.12.0"
@ -330,6 +487,31 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "signal-hook-registry"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
dependencies = [
"libc",
]
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "socket2"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.11.1" version = "0.11.1"
@ -352,12 +534,44 @@ name = "sysroot-cleaner"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait",
"clap", "clap",
"env_logger", "env_logger",
"goblin", "goblin",
"log", "log",
"nix", "nix",
"petgraph", "petgraph",
"tokio",
"walkdir",
]
[[package]]
name = "tokio"
version = "1.43.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys 0.52.0",
]
[[package]]
name = "tokio-macros"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
@ -372,6 +586,40 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.59.0" version = "0.59.0"

View File

@ -11,9 +11,12 @@ readme = "README.md"
[dependencies] [dependencies]
anyhow = { version = "1.0" } anyhow = { version = "1.0" }
async-trait = { version = "0.1" }
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
env_logger = { version = "0.11" } env_logger = { version = "0.11" }
goblin = { version = "0.9" } goblin = { version = "0.9" }
log = { version = "0.4" } log = { version = "0.4" }
nix = { version = "0.29" } nix = { version = "0.29" }
petgraph = { version = "0.7" } petgraph = { version = "0.7" }
tokio = { version = "1", features = ["full"] }
walkdir = { version = "2" }

View File

@ -1,8 +1,23 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu> // SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2 // SPDX-License-Identifier: EUPL-1.2
use std::path::PathBuf;
use clap::Parser; use clap::Parser;
/// A tool to clean up sysroots for Linux embedded devices to save storage space.
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
pub struct Args {} pub struct Args {
/// Simulate operations without carrying them out
#[arg(short = 'n', long, default_value_t = false)]
pub dry_run: bool,
/// Instead of simply removing files, move them to the
/// given location, preserving their relative folder structure
#[arg(long)]
pub split_to: Option<PathBuf>,
/// The location of the sysroot to clean up
pub sysroot_location: PathBuf,
}

173
src/cleaners.rs Normal file
View File

@ -0,0 +1,173 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod dso;
use crate::{
args::Args,
decision::{Action, Decision},
};
use anyhow::Result;
use async_trait::async_trait;
use dso::Dso;
use nix::libc::EXDEV;
use std::{
collections::HashMap,
io,
path::{Path, PathBuf},
};
use tokio::{
sync::{broadcast, mpsc},
task::JoinSet,
};
use walkdir::WalkDir;
#[async_trait]
pub trait Cleaner {
async fn run(
&mut self,
files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>,
) -> Result<()>;
}
type RemovalFn = Box<dyn Fn(PathBuf) -> io::Result<()>>;
pub struct Runner {
cleaners: Vec<Box<dyn Cleaner + Send>>,
removal_fn: RemovalFn,
}
impl Runner {
pub fn new(args: Args) -> Self {
let removal_fn = Self::new_removal_fn(&args);
Self {
cleaners: vec![Box::new(Dso::new())],
removal_fn,
}
}
pub async fn run(self) -> Result<()> {
let input_tx = broadcast::Sender::new(100);
let (output_tx, output_rx) = mpsc::channel(100);
let mut tasks = JoinSet::new();
// Processors
for mut cleaner in self.cleaners {
let input_rx = input_tx.subscribe();
let output_tx_clone = output_tx.clone();
tasks.spawn(async move { cleaner.run(input_rx, output_tx_clone).await });
}
drop(output_tx);
// Producer of inputs (note that this needs to happen
// after all channels have been created)
tasks.spawn(Self::input_producer(input_tx));
// Output consumer
Self::output_consumer(self.removal_fn, output_rx).await;
while let Some(task) = tasks.join_next().await {
if let Err(err) = task? {
log::error!("{}", err);
}
}
Ok(())
}
async fn input_producer(input_tx: broadcast::Sender<PathBuf>) -> Result<()> {
let walker = WalkDir::new(".");
for entry in walker {
match entry {
Ok(e) if !e.file_type().is_dir() => {
if input_tx.len() >= 75 {
// TODO: FIXME: make this better, e.g. use backoff, this is a quick
// hack
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
}
input_tx.send(e.into_path())?;
}
Ok(_) => continue,
Err(err) => log::warn!("unable to access path: {}", err),
}
}
// we handle errors by warning the user, otherwise we always succeed
Ok(())
}
async fn output_consumer(removal_fn: RemovalFn, mut output_rx: mpsc::Receiver<Decision>) {
let mut to_remove = HashMap::new();
while let Some(decision) = output_rx.recv().await {
match to_remove.get_mut(&decision.path) {
Some(prev_action) => {
if decision.action == Action::Keep {
*prev_action = Action::Keep;
}
}
None => {
to_remove.insert(decision.path, decision.action);
}
}
}
for (file, action) in to_remove {
if action == Action::Remove {
if let Err(err) = (removal_fn)(file) {
log::error!("{}", err);
}
}
}
}
fn new_removal_fn(args: &Args) -> RemovalFn {
if let Some(dest) = args.split_to.clone() {
if args.dry_run {
Box::new(move |path| {
log::info!(
"(dry-run) would move {} to {}",
path.display(),
dest.display()
);
Ok(())
})
} else {
Box::new(move |path| {
log::info!("moving {} to {}", path.display(), dest.display());
Self::move_preserve(&path, &dest)
})
}
} else {
if args.dry_run {
Box::new(|path| {
log::info!("(dry-run) would remove {}", path.display());
Ok(())
})
} else {
Box::new(move |path| {
log::info!("removing {}", path.display());
std::fs::remove_file(&path)
})
}
}
}
fn move_preserve(src: &Path, dest: &Path) -> io::Result<()> {
assert!(src.is_relative());
let abs_dest = dest.join(src);
if let Some(parent) = abs_dest.parent() {
std::fs::create_dir_all(parent)?;
}
match std::fs::rename(&src, &abs_dest) {
Err(err) if err.raw_os_error() == Some(EXDEV) => {
log::trace!(
"different filesystems, falling back to copying {} to {}",
src.display(),
abs_dest.display()
);
std::fs::copy(src, abs_dest).and_then(|_| std::fs::remove_file(src))
}
other => other,
}
}
}

40
src/cleaners/dso.rs Normal file
View File

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use super::Cleaner;
use crate::decision::{Action, Decision};
use anyhow::Result;
use async_trait::async_trait;
use std::path::PathBuf;
use tokio::sync::{broadcast, mpsc};
/// Cleans up unused shared libraries
/// and warns about broken dependencies as well
pub struct Dso {}
impl Dso {
pub fn new() -> Self {
Self {}
}
}
#[async_trait]
impl Cleaner for Dso {
async fn run(
&mut self,
mut files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>,
) -> Result<()> {
while let Ok(file) = files.recv().await {
// TODO: handle Lagged?
decisions
.send(Decision {
path: file,
action: Action::Remove,
})
.await?;
}
Ok(())
}
}

16
src/decision.rs Normal file
View File

@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use std::path::PathBuf;
#[derive(PartialEq, Eq)]
pub enum Action {
Keep,
Remove,
}
pub struct Decision {
pub path: PathBuf,
pub action: Action,
//pub reason: Option<String>,
}

View File

@ -2,16 +2,24 @@
// SPDX-License-Identifier: EUPL-1.2 // SPDX-License-Identifier: EUPL-1.2
mod args; mod args;
mod cleaners;
mod decision;
use anyhow::Result; use anyhow::Result;
use args::Args; use args::Args;
use clap::Parser as _; use clap::Parser as _;
use cleaners::Runner;
use env_logger::Env; use env_logger::Env;
fn main() -> Result<()> { #[tokio::main]
let logging_env = Env::default().default_filter_or("info"); async fn main() -> Result<()> {
let logging_env = Env::default().filter_or("LOG_LEVEL", "warn");
env_logger::Builder::from_env(logging_env).init(); env_logger::Builder::from_env(logging_env).init();
let _args = Args::try_parse()?; let args = Args::try_parse()?;
std::env::set_current_dir(&args.sysroot_location)?;
let runner = Runner::new(args);
runner.run().await?;
Ok(()) Ok(())
} }