feat(list): add allow and blocklist handling

This uses the .gitignore format to identify
which files should be allowed / blocked.

The allowlist gets precedence over the blocklist
if both are specified.
This commit is contained in:
Matteo Settenvini 2025-01-26 14:53:39 +01:00
parent 54075012aa
commit 344e16cf0f
Signed by: matteo
GPG Key ID: 1C1B12600D81DE05
7 changed files with 188 additions and 9 deletions

85
Cargo.lock generated
View File

@ -120,6 +120,16 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
name = "bstr"
version = "1.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.9.0" version = "1.9.0"
@ -184,6 +194,31 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "env_filter" name = "env_filter"
version = "0.1.3" version = "0.1.3"
@ -225,6 +260,19 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "globset"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata",
"regex-syntax",
]
[[package]] [[package]]
name = "goblin" name = "goblin"
version = "0.9.3" version = "0.9.3"
@ -254,6 +302,22 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "ignore"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b"
dependencies = [
"crossbeam-deque",
"globset",
"log",
"memchr",
"regex-automata",
"same-file",
"walkdir",
"winapi-util",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.7.1" version = "2.7.1"
@ -496,6 +560,26 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "serde"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.4.2" version = "1.4.2"
@ -547,6 +631,7 @@ dependencies = [
"clap", "clap",
"env_logger", "env_logger",
"goblin", "goblin",
"ignore",
"log", "log",
"memmap2", "memmap2",
"nix", "nix",

View File

@ -14,6 +14,7 @@ anyhow = { version = "1.0" }
async-trait = { version = "0.1" } async-trait = { version = "0.1" }
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
env_logger = { version = "0.11" } env_logger = { version = "0.11" }
ignore = { version = "0.4" }
goblin = { version = "0.9" } goblin = { version = "0.9" }
log = { version = "0.4" } log = { version = "0.4" }
memmap2 = { version = "0.9" } memmap2 = { version = "0.9" }

View File

@ -18,6 +18,15 @@ pub struct Args {
#[arg(long)] #[arg(long)]
pub split_to: Option<PathBuf>, pub split_to: Option<PathBuf>,
/// An allowlist of files to keep, in .gitignore format.
/// Note: this will take precedence over all other removal decisions.
#[arg(long)]
pub allowlist: Option<PathBuf>,
/// A blocklist of files to remove, in .gitignore format.
#[arg(long)]
pub blocklist: Option<PathBuf>,
/// The location of the sysroot to clean up /// The location of the sysroot to clean up
pub sysroot_location: PathBuf, pub sysroot_location: PathBuf,
} }

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: EUPL-1.2 // SPDX-License-Identifier: EUPL-1.2
mod dso; mod dso;
mod list;
use crate::{ use crate::{
args::Args, args::Args,
@ -10,6 +11,7 @@ use crate::{
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use dso::DsoCleaner; use dso::DsoCleaner;
use list::ListCleaner;
use nix::libc::EXDEV; use nix::libc::EXDEV;
use std::{ use std::{
collections::HashMap, collections::HashMap,
@ -26,15 +28,16 @@ use walkdir::{DirEntry, WalkDir};
pub trait Cleaner { pub trait Cleaner {
async fn run( async fn run(
&mut self, &mut self,
files: broadcast::Receiver<PathBuf>, mut files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>, decisions: mpsc::Sender<Decision>,
) -> Result<()>; ) -> Result<()>;
} }
type RemovalFn = Box<dyn Fn(PathBuf) -> io::Result<()>>; type Cleaners = Vec<Box<dyn Cleaner + Send>>;
type RemovalFn = Box<dyn Fn(&Path) -> io::Result<()>>;
pub struct Runner { pub struct Runner {
cleaners: Vec<Box<dyn Cleaner + Send>>, cleaners: Cleaners,
removal_fn: RemovalFn, removal_fn: RemovalFn,
} }
@ -44,8 +47,18 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
impl Runner { impl Runner {
pub fn new(args: Args) -> Self { pub fn new(args: Args) -> Self {
let removal_fn = Self::new_removal_fn(&args); let removal_fn = Self::new_removal_fn(&args);
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())];
if let Some(wl) = args.allowlist {
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
}
if let Some(bl) = args.blocklist {
cleaners.push(Box::new(ListCleaner::new(Action::Remove, bl)));
}
Self { Self {
cleaners: vec![Box::new(DsoCleaner::default())], cleaners,
removal_fn, removal_fn,
} }
} }
@ -137,8 +150,8 @@ impl Runner {
for (file, action) in to_remove { for (file, action) in to_remove {
if action == Action::Remove { if action == Action::Remove {
if let Err(err) = (removal_fn)(file) { if let Err(err) = (removal_fn)(&file) {
log::error!("{}", err); log::error!("{}: {}", file.display(), err);
} }
} }
} }

View File

@ -101,7 +101,7 @@ impl DsoCleaner {
let mut hdr = [0u8; 4]; let mut hdr = [0u8; 4];
if let Err(e) = f.read_exact(&mut hdr) { if let Err(e) = f.read_exact(&mut hdr) {
if e.kind() != ErrorKind::UnexpectedEof { if e.kind() != ErrorKind::UnexpectedEof {
anyhow::bail!(e) anyhow::bail!("{}: {}", path.display(), e)
} }
return Ok(()); // not ELF, ignore return Ok(()); // not ELF, ignore
@ -135,7 +135,7 @@ impl DsoCleaner {
let dst = nix::sys::stat::stat(path)?; let dst = nix::sys::stat::stat(path)?;
if src.st_dev != dst.st_dev { if src.st_dev != dst.st_dev {
log::warn!( log::warn!(
"{} points outside of the sysroot filesystem, check if this is intended", "dso: {} points outside of the sysroot filesystem, check if this is intended",
path.display() path.display()
); );
return Ok(()); return Ok(());

71
src/cleaners/list.rs Normal file
View File

@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use super::Cleaner;
use crate::decision::{Action, Decision};
use anyhow::{Context, Result};
use async_trait::async_trait;
use std::path::PathBuf;
use tokio::sync::{
broadcast::{self, error::RecvError},
mpsc,
};
pub struct ListCleaner {
action_type: Action,
list: PathBuf,
}
impl ListCleaner {
pub fn new(action_type: Action, list: PathBuf) -> Self {
Self { action_type, list }
}
}
#[async_trait]
impl Cleaner for ListCleaner {
async fn run(
&mut self,
mut files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>,
) -> Result<()> {
let current_dir = std::env::current_dir()?;
// We use a gitignore builder because it has the syntax and
// expressivity we need. Checking if a file "is ignored"
// will tell us if it matches against the allow-/block-list
let mut matcher_builder = ignore::gitignore::GitignoreBuilder::new(current_dir);
if let Some(err) = matcher_builder.add(&self.list) {
anyhow::bail!("list: error parsing {}: {}", self.list.display(), err);
}
let matcher = matcher_builder
.build()
.with_context(|| format!("list: cannot build matcher from {}", self.list.display()))?;
let action_name = match self.action_type {
Action::Keep => "allow",
Action::Remove => "block",
};
loop {
match files.recv().await {
Ok(path) => {
if matcher.matched(&path, false).is_ignore() {
log::info!("{}list: marking {}", action_name, path.display());
decisions
.send(Decision {
action: self.action_type,
path: path,
})
.await?;
}
}
Err(RecvError::Closed) => break,
Err(e) => Err(anyhow::anyhow!(e)).context("allowlist: recv error")?,
}
}
Ok(())
}
}

View File

@ -3,7 +3,7 @@
use std::path::PathBuf; use std::path::PathBuf;
#[derive(PartialEq, Eq)] #[derive(PartialEq, Eq, Clone, Copy)]
pub enum Action { pub enum Action {
Keep, Keep,
Remove, Remove,