feat(list): add allow and blocklist handling

This uses the .gitignore format to identify
which files should be allowed / blocked.

The allowlist gets precedence over the blocklist
if both are specified.
This commit is contained in:
Matteo Settenvini 2025-01-26 14:53:39 +01:00
parent 54075012aa
commit 344e16cf0f
Signed by: matteo
GPG key ID: 1C1B12600D81DE05
7 changed files with 188 additions and 9 deletions

View file

@ -18,6 +18,15 @@ pub struct Args {
#[arg(long)]
pub split_to: Option<PathBuf>,
/// An allowlist of files to keep, in .gitignore format.
/// Note: this will take precedence over all other removal decisions.
#[arg(long)]
pub allowlist: Option<PathBuf>,
/// A blocklist of files to remove, in .gitignore format.
#[arg(long)]
pub blocklist: Option<PathBuf>,
/// The location of the sysroot to clean up
pub sysroot_location: PathBuf,
}

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: EUPL-1.2
mod dso;
mod list;
use crate::{
args::Args,
@ -10,6 +11,7 @@ use crate::{
use anyhow::Result;
use async_trait::async_trait;
use dso::DsoCleaner;
use list::ListCleaner;
use nix::libc::EXDEV;
use std::{
collections::HashMap,
@ -26,15 +28,16 @@ use walkdir::{DirEntry, WalkDir};
pub trait Cleaner {
async fn run(
&mut self,
files: broadcast::Receiver<PathBuf>,
mut files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>,
) -> Result<()>;
}
type RemovalFn = Box<dyn Fn(PathBuf) -> io::Result<()>>;
type Cleaners = Vec<Box<dyn Cleaner + Send>>;
type RemovalFn = Box<dyn Fn(&Path) -> io::Result<()>>;
pub struct Runner {
cleaners: Vec<Box<dyn Cleaner + Send>>,
cleaners: Cleaners,
removal_fn: RemovalFn,
}
@ -44,8 +47,18 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
impl Runner {
pub fn new(args: Args) -> Self {
let removal_fn = Self::new_removal_fn(&args);
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())];
if let Some(wl) = args.allowlist {
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
}
if let Some(bl) = args.blocklist {
cleaners.push(Box::new(ListCleaner::new(Action::Remove, bl)));
}
Self {
cleaners: vec![Box::new(DsoCleaner::default())],
cleaners,
removal_fn,
}
}
@ -137,8 +150,8 @@ impl Runner {
for (file, action) in to_remove {
if action == Action::Remove {
if let Err(err) = (removal_fn)(file) {
log::error!("{}", err);
if let Err(err) = (removal_fn)(&file) {
log::error!("{}: {}", file.display(), err);
}
}
}

View file

@ -101,7 +101,7 @@ impl DsoCleaner {
let mut hdr = [0u8; 4];
if let Err(e) = f.read_exact(&mut hdr) {
if e.kind() != ErrorKind::UnexpectedEof {
anyhow::bail!(e)
anyhow::bail!("{}: {}", path.display(), e)
}
return Ok(()); // not ELF, ignore
@ -135,7 +135,7 @@ impl DsoCleaner {
let dst = nix::sys::stat::stat(path)?;
if src.st_dev != dst.st_dev {
log::warn!(
"{} points outside of the sysroot filesystem, check if this is intended",
"dso: {} points outside of the sysroot filesystem, check if this is intended",
path.display()
);
return Ok(());

71
src/cleaners/list.rs Normal file
View file

@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use super::Cleaner;
use crate::decision::{Action, Decision};
use anyhow::{Context, Result};
use async_trait::async_trait;
use std::path::PathBuf;
use tokio::sync::{
broadcast::{self, error::RecvError},
mpsc,
};
pub struct ListCleaner {
action_type: Action,
list: PathBuf,
}
impl ListCleaner {
pub fn new(action_type: Action, list: PathBuf) -> Self {
Self { action_type, list }
}
}
#[async_trait]
impl Cleaner for ListCleaner {
async fn run(
&mut self,
mut files: broadcast::Receiver<PathBuf>,
decisions: mpsc::Sender<Decision>,
) -> Result<()> {
let current_dir = std::env::current_dir()?;
// We use a gitignore builder because it has the syntax and
// expressivity we need. Checking if a file "is ignored"
// will tell us if it matches against the allow-/block-list
let mut matcher_builder = ignore::gitignore::GitignoreBuilder::new(current_dir);
if let Some(err) = matcher_builder.add(&self.list) {
anyhow::bail!("list: error parsing {}: {}", self.list.display(), err);
}
let matcher = matcher_builder
.build()
.with_context(|| format!("list: cannot build matcher from {}", self.list.display()))?;
let action_name = match self.action_type {
Action::Keep => "allow",
Action::Remove => "block",
};
loop {
match files.recv().await {
Ok(path) => {
if matcher.matched(&path, false).is_ignore() {
log::info!("{}list: marking {}", action_name, path.display());
decisions
.send(Decision {
action: self.action_type,
path: path,
})
.await?;
}
}
Err(RecvError::Closed) => break,
Err(e) => Err(anyhow::anyhow!(e)).context("allowlist: recv error")?,
}
}
Ok(())
}
}

View file

@ -3,7 +3,7 @@
use std::path::PathBuf;
#[derive(PartialEq, Eq)]
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum Action {
Keep,
Remove,