feat(list): add allow and blocklist handling
This uses the .gitignore format to identify which files should be allowed / blocked. The allowlist gets precedence over the blocklist if both are specified.
This commit is contained in:
parent
54075012aa
commit
344e16cf0f
|
@ -120,6 +120,16 @@ version = "2.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bstr"
|
||||||
|
version = "1.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.9.0"
|
version = "1.9.0"
|
||||||
|
@ -184,6 +194,31 @@ version = "1.0.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "env_filter"
|
name = "env_filter"
|
||||||
version = "0.1.3"
|
version = "0.1.3"
|
||||||
|
@ -225,6 +260,19 @@ version = "0.31.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "globset"
|
||||||
|
version = "0.4.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"bstr",
|
||||||
|
"log",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "goblin"
|
name = "goblin"
|
||||||
version = "0.9.3"
|
version = "0.9.3"
|
||||||
|
@ -254,6 +302,22 @@ version = "2.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ignore"
|
||||||
|
version = "0.4.23"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"globset",
|
||||||
|
"log",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"same-file",
|
||||||
|
"walkdir",
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.7.1"
|
version = "2.7.1"
|
||||||
|
@ -496,6 +560,26 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.217"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.217"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-registry"
|
name = "signal-hook-registry"
|
||||||
version = "1.4.2"
|
version = "1.4.2"
|
||||||
|
@ -547,6 +631,7 @@ dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"goblin",
|
"goblin",
|
||||||
|
"ignore",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"nix",
|
"nix",
|
||||||
|
|
|
@ -14,6 +14,7 @@ anyhow = { version = "1.0" }
|
||||||
async-trait = { version = "0.1" }
|
async-trait = { version = "0.1" }
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
env_logger = { version = "0.11" }
|
env_logger = { version = "0.11" }
|
||||||
|
ignore = { version = "0.4" }
|
||||||
goblin = { version = "0.9" }
|
goblin = { version = "0.9" }
|
||||||
log = { version = "0.4" }
|
log = { version = "0.4" }
|
||||||
memmap2 = { version = "0.9" }
|
memmap2 = { version = "0.9" }
|
||||||
|
|
|
@ -18,6 +18,15 @@ pub struct Args {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub split_to: Option<PathBuf>,
|
pub split_to: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// An allowlist of files to keep, in .gitignore format.
|
||||||
|
/// Note: this will take precedence over all other removal decisions.
|
||||||
|
#[arg(long)]
|
||||||
|
pub allowlist: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// A blocklist of files to remove, in .gitignore format.
|
||||||
|
#[arg(long)]
|
||||||
|
pub blocklist: Option<PathBuf>,
|
||||||
|
|
||||||
/// The location of the sysroot to clean up
|
/// The location of the sysroot to clean up
|
||||||
pub sysroot_location: PathBuf,
|
pub sysroot_location: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: EUPL-1.2
|
// SPDX-License-Identifier: EUPL-1.2
|
||||||
|
|
||||||
mod dso;
|
mod dso;
|
||||||
|
mod list;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
args::Args,
|
args::Args,
|
||||||
|
@ -10,6 +11,7 @@ use crate::{
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use dso::DsoCleaner;
|
use dso::DsoCleaner;
|
||||||
|
use list::ListCleaner;
|
||||||
use nix::libc::EXDEV;
|
use nix::libc::EXDEV;
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
|
@ -26,15 +28,16 @@ use walkdir::{DirEntry, WalkDir};
|
||||||
pub trait Cleaner {
|
pub trait Cleaner {
|
||||||
async fn run(
|
async fn run(
|
||||||
&mut self,
|
&mut self,
|
||||||
files: broadcast::Receiver<PathBuf>,
|
mut files: broadcast::Receiver<PathBuf>,
|
||||||
decisions: mpsc::Sender<Decision>,
|
decisions: mpsc::Sender<Decision>,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
type RemovalFn = Box<dyn Fn(PathBuf) -> io::Result<()>>;
|
type Cleaners = Vec<Box<dyn Cleaner + Send>>;
|
||||||
|
type RemovalFn = Box<dyn Fn(&Path) -> io::Result<()>>;
|
||||||
|
|
||||||
pub struct Runner {
|
pub struct Runner {
|
||||||
cleaners: Vec<Box<dyn Cleaner + Send>>,
|
cleaners: Cleaners,
|
||||||
removal_fn: RemovalFn,
|
removal_fn: RemovalFn,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,8 +47,18 @@ const CHANNEL_MAX_LOAD: usize = CHANNEL_SIZE * 3 / 4;
|
||||||
impl Runner {
|
impl Runner {
|
||||||
pub fn new(args: Args) -> Self {
|
pub fn new(args: Args) -> Self {
|
||||||
let removal_fn = Self::new_removal_fn(&args);
|
let removal_fn = Self::new_removal_fn(&args);
|
||||||
|
let mut cleaners: Cleaners = vec![Box::new(DsoCleaner::default())];
|
||||||
|
|
||||||
|
if let Some(wl) = args.allowlist {
|
||||||
|
cleaners.push(Box::new(ListCleaner::new(Action::Keep, wl)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(bl) = args.blocklist {
|
||||||
|
cleaners.push(Box::new(ListCleaner::new(Action::Remove, bl)));
|
||||||
|
}
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
cleaners: vec![Box::new(DsoCleaner::default())],
|
cleaners,
|
||||||
removal_fn,
|
removal_fn,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,8 +150,8 @@ impl Runner {
|
||||||
|
|
||||||
for (file, action) in to_remove {
|
for (file, action) in to_remove {
|
||||||
if action == Action::Remove {
|
if action == Action::Remove {
|
||||||
if let Err(err) = (removal_fn)(file) {
|
if let Err(err) = (removal_fn)(&file) {
|
||||||
log::error!("{}", err);
|
log::error!("{}: {}", file.display(), err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,7 +101,7 @@ impl DsoCleaner {
|
||||||
let mut hdr = [0u8; 4];
|
let mut hdr = [0u8; 4];
|
||||||
if let Err(e) = f.read_exact(&mut hdr) {
|
if let Err(e) = f.read_exact(&mut hdr) {
|
||||||
if e.kind() != ErrorKind::UnexpectedEof {
|
if e.kind() != ErrorKind::UnexpectedEof {
|
||||||
anyhow::bail!(e)
|
anyhow::bail!("{}: {}", path.display(), e)
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(()); // not ELF, ignore
|
return Ok(()); // not ELF, ignore
|
||||||
|
@ -135,7 +135,7 @@ impl DsoCleaner {
|
||||||
let dst = nix::sys::stat::stat(path)?;
|
let dst = nix::sys::stat::stat(path)?;
|
||||||
if src.st_dev != dst.st_dev {
|
if src.st_dev != dst.st_dev {
|
||||||
log::warn!(
|
log::warn!(
|
||||||
"{} points outside of the sysroot filesystem, check if this is intended",
|
"dso: {} points outside of the sysroot filesystem, check if this is intended",
|
||||||
path.display()
|
path.display()
|
||||||
);
|
);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
// SPDX-FileCopyrightText: Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
|
||||||
|
// SPDX-License-Identifier: EUPL-1.2
|
||||||
|
|
||||||
|
use super::Cleaner;
|
||||||
|
use crate::decision::{Action, Decision};
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use tokio::sync::{
|
||||||
|
broadcast::{self, error::RecvError},
|
||||||
|
mpsc,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct ListCleaner {
|
||||||
|
action_type: Action,
|
||||||
|
list: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ListCleaner {
|
||||||
|
pub fn new(action_type: Action, list: PathBuf) -> Self {
|
||||||
|
Self { action_type, list }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Cleaner for ListCleaner {
|
||||||
|
async fn run(
|
||||||
|
&mut self,
|
||||||
|
mut files: broadcast::Receiver<PathBuf>,
|
||||||
|
decisions: mpsc::Sender<Decision>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let current_dir = std::env::current_dir()?;
|
||||||
|
|
||||||
|
// We use a gitignore builder because it has the syntax and
|
||||||
|
// expressivity we need. Checking if a file "is ignored"
|
||||||
|
// will tell us if it matches against the allow-/block-list
|
||||||
|
let mut matcher_builder = ignore::gitignore::GitignoreBuilder::new(current_dir);
|
||||||
|
if let Some(err) = matcher_builder.add(&self.list) {
|
||||||
|
anyhow::bail!("list: error parsing {}: {}", self.list.display(), err);
|
||||||
|
}
|
||||||
|
|
||||||
|
let matcher = matcher_builder
|
||||||
|
.build()
|
||||||
|
.with_context(|| format!("list: cannot build matcher from {}", self.list.display()))?;
|
||||||
|
|
||||||
|
let action_name = match self.action_type {
|
||||||
|
Action::Keep => "allow",
|
||||||
|
Action::Remove => "block",
|
||||||
|
};
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match files.recv().await {
|
||||||
|
Ok(path) => {
|
||||||
|
if matcher.matched(&path, false).is_ignore() {
|
||||||
|
log::info!("{}list: marking {}", action_name, path.display());
|
||||||
|
decisions
|
||||||
|
.send(Decision {
|
||||||
|
action: self.action_type,
|
||||||
|
path: path,
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(RecvError::Closed) => break,
|
||||||
|
Err(e) => Err(anyhow::anyhow!(e)).context("allowlist: recv error")?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||||
pub enum Action {
|
pub enum Action {
|
||||||
Keep,
|
Keep,
|
||||||
Remove,
|
Remove,
|
||||||
|
|
Loading…
Reference in New Issue