diff --git a/Cargo.lock b/Cargo.lock index a5522c6..ee980ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -120,6 +120,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "bstr" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bytes" version = "1.9.0" @@ -184,6 +194,31 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "env_filter" version = "0.1.3" @@ -225,6 +260,19 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "globset" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "goblin" version = "0.9.3" @@ -254,6 +302,22 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "ignore" +version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "2.7.1" @@ -496,6 +560,26 @@ dependencies = [ "syn", ] +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -547,6 +631,7 @@ dependencies = [ "clap", "env_logger", "goblin", + "ignore", "log", "memmap2", "nix", diff --git a/Cargo.toml b/Cargo.toml index 7f0527e..932350b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ anyhow = { version = "1.0" } async-trait = { version = "0.1" } clap = { version = "4.5", features = ["derive"] } env_logger = { version = "0.11" } +ignore = { version = "0.4" } goblin = { version = "0.9" } log = { version = "0.4" } memmap2 = { version = "0.9" } diff --git a/src/args.rs b/src/args.rs index c70ecf8..78e4e4a 100644 --- a/src/args.rs +++ b/src/args.rs @@ -18,6 +18,15 @@ pub struct Args { #[arg(long)] pub split_to: Option, + /// An allowlist of files to keep, in .gitignore format. + /// Note: this will take precedence over all other removal decisions. + #[arg(long)] + pub allowlist: Option, + + /// A blocklist of files to remove, in .gitignore format. + #[arg(long)] + pub blocklist: Option, + /// The location of the sysroot to clean up pub sysroot_location: PathBuf, } diff --git a/src/cleaners.rs b/src/cleaners.rs index bd58fdc..3b5a895 100644 --- a/src/cleaners.rs +++ b/src/cleaners.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: EUPL-1.2 mod dso; +mod list; use crate::{ args::Args, @@ -10,6 +11,7 @@ use crate::{ use anyhow::Result; use async_trait::async_trait; use dso::DsoCleaner; +use list::ListCleaner; use nix::libc::EXDEV; use std::{ collections::HashMap, @@ -26,15 +28,16 @@ use walkdir::{DirEntry, WalkDir}; pub trait Cleaner { async fn run( &mut self, - files: broadcast::Receiver, + mut files: broadcast::Receiver, decisions: mpsc::Sender, ) -> Result<()>; } -type RemovalFn = Box io::Result<()>>; +type Cleaners = Vec>; +type RemovalFn = Box io::Result<()>>; pub struct Runner { - cleaners: Vec>, + cleaners: Cleaners, removal_fn: RemovalFn, } @@ -44,8 +47,18 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4; impl Runner { pub fn new(args: Args) -> Self { let removal_fn = Self::new_removal_fn(&args); + let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())]; + + if let Some(wl) = args.allowlist { + cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl))); + } + + if let Some(bl) = args.blocklist { + cleaners.push(Box::new(ListCleaner::new(Action::Remove, bl))); + } + Self { - cleaners: vec![Box::new(DsoCleaner::default())], + cleaners, removal_fn, } } @@ -137,8 +150,8 @@ impl Runner { for (file, action) in to_remove { if action == Action::Remove { - if let Err(err) = (removal_fn)(file) { - log::error!("{}", err); + if let Err(err) = (removal_fn)(&file) { + log::error!("{}: {}", file.display(), err); } } } diff --git a/src/cleaners/dso.rs b/src/cleaners/dso.rs index ac962d1..49ff7e8 100644 --- a/src/cleaners/dso.rs +++ b/src/cleaners/dso.rs @@ -101,7 +101,7 @@ impl DsoCleaner { let mut hdr = [0u8; 4]; if let Err(e) = f.read_exact(&mut hdr) { if e.kind() != ErrorKind::UnexpectedEof { - anyhow::bail!(e) + anyhow::bail!("{}: {}", path.display(), e) } return Ok(()); // not ELF, ignore @@ -135,7 +135,7 @@ impl DsoCleaner { let dst = nix::sys::stat::stat(path)?; if src.st_dev != dst.st_dev { log::warn!( - "{} points outside of the sysroot filesystem, check if this is intended", + "dso: {} points outside of the sysroot filesystem, check if this is intended", path.display() ); return Ok(()); diff --git a/src/cleaners/list.rs b/src/cleaners/list.rs new file mode 100644 index 0000000..fbfda83 --- /dev/null +++ b/src/cleaners/list.rs @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Matteo Settenvini +// SPDX-License-Identifier: EUPL-1.2 + +use super::Cleaner; +use crate::decision::{Action, Decision}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use std::path::PathBuf; +use tokio::sync::{ + broadcast::{self, error::RecvError}, + mpsc, +}; + +pub struct ListCleaner { + action_type: Action, + list: PathBuf, +} + +impl ListCleaner { + pub fn new(action_type: Action, list: PathBuf) -> Self { + Self { action_type, list } + } +} + +#[async_trait] +impl Cleaner for ListCleaner { + async fn run( + &mut self, + mut files: broadcast::Receiver, + decisions: mpsc::Sender, + ) -> Result<()> { + let current_dir = std::env::current_dir()?; + + // We use a gitignore builder because it has the syntax and + // expressivity we need. Checking if a file "is ignored" + // will tell us if it matches against the allow-/block-list + let mut matcher_builder = ignore::gitignore::GitignoreBuilder::new(current_dir); + if let Some(err) = matcher_builder.add(&self.list) { + anyhow::bail!("list: error parsing {}: {}", self.list.display(), err); + } + + let matcher = matcher_builder + .build() + .with_context(|| format!("list: cannot build matcher from {}", self.list.display()))?; + + let action_name = match self.action_type { + Action::Keep => "allow", + Action::Remove => "block", + }; + + loop { + match files.recv().await { + Ok(path) => { + if matcher.matched(&path, false).is_ignore() { + log::info!("{}list: marking {}", action_name, path.display()); + decisions + .send(Decision { + action: self.action_type, + path: path, + }) + .await?; + } + } + Err(RecvError::Closed) => break, + Err(e) => Err(anyhow::anyhow!(e)).context("allowlist: recv error")?, + } + } + + Ok(()) + } +} diff --git a/src/decision.rs b/src/decision.rs index e0e0895..f59f2c8 100644 --- a/src/decision.rs +++ b/src/decision.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone, Copy)] pub enum Action { Keep, Remove,