feat(list): add allow and blocklist handling
This uses the .gitignore format to identify which files should be allowed / blocked. The allowlist gets precedence over the blocklist if both are specified.
This commit is contained in:
parent
54075012aa
commit
344e16cf0f
|
@ -120,6 +120,16 @@ version = "2.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.9.0"
|
||||
|
@ -184,6 +194,31 @@ version = "1.0.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.3"
|
||||
|
@ -225,6 +260,19 @@ version = "0.31.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
"log",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "goblin"
|
||||
version = "0.9.3"
|
||||
|
@ -254,6 +302,22 @@ version = "2.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"globset",
|
||||
"log",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"same-file",
|
||||
"walkdir",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.7.1"
|
||||
|
@ -496,6 +560,26 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.217"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.217"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.2"
|
||||
|
@ -547,6 +631,7 @@ dependencies = [
|
|||
"clap",
|
||||
"env_logger",
|
||||
"goblin",
|
||||
"ignore",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nix",
|
||||
|
|
|
@ -14,6 +14,7 @@ anyhow = { version = "1.0" }
|
|||
async-trait = { version = "0.1" }
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
env_logger = { version = "0.11" }
|
||||
ignore = { version = "0.4" }
|
||||
goblin = { version = "0.9" }
|
||||
log = { version = "0.4" }
|
||||
memmap2 = { version = "0.9" }
|
||||
|
|
|
@ -18,6 +18,15 @@ pub struct Args {
|
|||
#[arg(long)]
|
||||
pub split_to: Option<PathBuf>,
|
||||
|
||||
/// An allowlist of files to keep, in .gitignore format.
|
||||
/// Note: this will take precedence over all other removal decisions.
|
||||
#[arg(long)]
|
||||
pub allowlist: Option<PathBuf>,
|
||||
|
||||
/// A blocklist of files to remove, in .gitignore format.
|
||||
#[arg(long)]
|
||||
pub blocklist: Option<PathBuf>,
|
||||
|
||||
/// The location of the sysroot to clean up
|
||||
pub sysroot_location: PathBuf,
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: EUPL-1.2
|
||||
|
||||
mod dso;
|
||||
mod list;
|
||||
|
||||
use crate::{
|
||||
args::Args,
|
||||
|
@ -10,6 +11,7 @@ use crate::{
|
|||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use dso::DsoCleaner;
|
||||
use list::ListCleaner;
|
||||
use nix::libc::EXDEV;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
|
@ -26,15 +28,16 @@ use walkdir::{DirEntry, WalkDir};
|
|||
pub trait Cleaner {
|
||||
async fn run(
|
||||
&mut self,
|
||||
files: broadcast::Receiver<PathBuf>,
|
||||
mut files: broadcast::Receiver<PathBuf>,
|
||||
decisions: mpsc::Sender<Decision>,
|
||||
) -> Result<()>;
|
||||
}
|
||||
|
||||
type RemovalFn = Box<dyn Fn(PathBuf) -> io::Result<()>>;
|
||||
type Cleaners = Vec<Box<dyn Cleaner + Send>>;
|
||||
type RemovalFn = Box<dyn Fn(&Path) -> io::Result<()>>;
|
||||
|
||||
pub struct Runner {
|
||||
cleaners: Vec<Box<dyn Cleaner + Send>>,
|
||||
cleaners: Cleaners,
|
||||
removal_fn: RemovalFn,
|
||||
}
|
||||
|
||||
|
@ -44,8 +47,18 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
|
|||
impl Runner {
|
||||
pub fn new(args: Args) -> Self {
|
||||
let removal_fn = Self::new_removal_fn(&args);
|
||||
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())];
|
||||
|
||||
if let Some(wl) = args.allowlist {
|
||||
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
|
||||
}
|
||||
|
||||
if let Some(bl) = args.blocklist {
|
||||
cleaners.push(Box::new(ListCleaner::new(Action::Remove, bl)));
|
||||
}
|
||||
|
||||
Self {
|
||||
cleaners: vec![Box::new(DsoCleaner::default())],
|
||||
cleaners,
|
||||
removal_fn,
|
||||
}
|
||||
}
|
||||
|
@ -137,8 +150,8 @@ impl Runner {
|
|||
|
||||
for (file, action) in to_remove {
|
||||
if action == Action::Remove {
|
||||
if let Err(err) = (removal_fn)(file) {
|
||||
log::error!("{}", err);
|
||||
if let Err(err) = (removal_fn)(&file) {
|
||||
log::error!("{}: {}", file.display(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ impl DsoCleaner {
|
|||
let mut hdr = [0u8; 4];
|
||||
if let Err(e) = f.read_exact(&mut hdr) {
|
||||
if e.kind() != ErrorKind::UnexpectedEof {
|
||||
anyhow::bail!(e)
|
||||
anyhow::bail!("{}: {}", path.display(), e)
|
||||
}
|
||||
|
||||
return Ok(()); // not ELF, ignore
|
||||
|
@ -135,7 +135,7 @@ impl DsoCleaner {
|
|||
let dst = nix::sys::stat::stat(path)?;
|
||||
if src.st_dev != dst.st_dev {
|
||||
log::warn!(
|
||||
"{} points outside of the sysroot filesystem, check if this is intended",
|
||||
"dso: {} points outside of the sysroot filesystem, check if this is intended",
|
||||
path.display()
|
||||
);
|
||||
return Ok(());
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
|
||||
// SPDX-License-Identifier: EUPL-1.2
|
||||
|
||||
use super::Cleaner;
|
||||
use crate::decision::{Action, Decision};
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::{
|
||||
broadcast::{self, error::RecvError},
|
||||
mpsc,
|
||||
};
|
||||
|
||||
pub struct ListCleaner {
|
||||
action_type: Action,
|
||||
list: PathBuf,
|
||||
}
|
||||
|
||||
impl ListCleaner {
|
||||
pub fn new(action_type: Action, list: PathBuf) -> Self {
|
||||
Self { action_type, list }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Cleaner for ListCleaner {
|
||||
async fn run(
|
||||
&mut self,
|
||||
mut files: broadcast::Receiver<PathBuf>,
|
||||
decisions: mpsc::Sender<Decision>,
|
||||
) -> Result<()> {
|
||||
let current_dir = std::env::current_dir()?;
|
||||
|
||||
// We use a gitignore builder because it has the syntax and
|
||||
// expressivity we need. Checking if a file "is ignored"
|
||||
// will tell us if it matches against the allow-/block-list
|
||||
let mut matcher_builder = ignore::gitignore::GitignoreBuilder::new(current_dir);
|
||||
if let Some(err) = matcher_builder.add(&self.list) {
|
||||
anyhow::bail!("list: error parsing {}: {}", self.list.display(), err);
|
||||
}
|
||||
|
||||
let matcher = matcher_builder
|
||||
.build()
|
||||
.with_context(|| format!("list: cannot build matcher from {}", self.list.display()))?;
|
||||
|
||||
let action_name = match self.action_type {
|
||||
Action::Keep => "allow",
|
||||
Action::Remove => "block",
|
||||
};
|
||||
|
||||
loop {
|
||||
match files.recv().await {
|
||||
Ok(path) => {
|
||||
if matcher.matched(&path, false).is_ignore() {
|
||||
log::info!("{}list: marking {}", action_name, path.display());
|
||||
decisions
|
||||
.send(Decision {
|
||||
action: self.action_type,
|
||||
path: path,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
Err(RecvError::Closed) => break,
|
||||
Err(e) => Err(anyhow::anyhow!(e)).context("allowlist: recv error")?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Action {
|
||||
Keep,
|
||||
Remove,
|
||||
|
|
Loading…
Reference in New Issue