Fix listing of S3 prefixes not terminated by a slash #3

Merged
matteo merged 5 commits from eay/serves3:hackathon_slash_fix_erik_and_eren into main 2024-06-12 12:05:38 +02:00
13 changed files with 2122 additions and 619 deletions
Showing only changes of commit 4defbcec1f - Show all commits

2
.gitignore vendored
View File

@ -7,4 +7,4 @@
/build
/target
/Settings.toml
/serves3.toml

View File

@ -23,7 +23,7 @@ repos:
name: Ensure no trailing spaces at the end of lines
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
rev: v1.5.2
rev: v1.5.5
hooks:
- id: remove-crlf
name: Enforce LF instead of CRLF for newlines
@ -40,7 +40,7 @@ repos:
name: Check Rust code
- repo: https://github.com/fsfe/reuse-tool.git
rev: v2.1.0
rev: v3.0.2
hooks:
- id: reuse
name: Check copyright and license information

19
.vscode/launch.json vendored
View File

@ -42,6 +42,25 @@
},
"args": [],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug integration test 'integration'",
"cargo": {
"args": [
"test",
"--no-run",
"--test=integration",
"--package=serves3"
],
"filter": {
"name": "integration",
"kind": "test"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}

2162
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
[package]
name = "serves3"
version = "1.0.0"
version = "1.1.0"
authors = ["Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>"]
description = "A very simple proxy to browse files from private S3 buckets"
@ -20,14 +20,26 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
config = "0.13"
anyhow = "1.0"
human-size = "0.4"
lazy_static = "1.4"
log = "0.4"
rocket = "0.5"
rocket_dyn_templates = { version = "0.1.0", features = ["tera"] }
rust-s3 = { version = "0.33", default-features = false, features = ["tokio-native-tls"] }
rocket_dyn_templates = { version = "0.2.0", features = ["tera"] }
rust-s3 = { version = "0.33", default-features = false, features = [
"tokio-native-tls",
] }
serde = { version = "1.0" }
tempfile = { version = "3.6" }
[dev-dependencies]
rstest = "0.19"
libc = "0.2"
futures = "0.3"
regex = "1.10"
rstest = "0.21"
reqwest = "0.12"
scraper = "0.19"
test-log = "0.2"
testcontainers = "0.17"
testcontainers-modules = { version = "0.5", features = ["minio"] }
tokio = { version = "1", features = ["process"] }

View File

@ -11,9 +11,9 @@ Also helpful to do a different TLS termination.
## Configuration
Copy `Settings.toml.example` to `Settings.toml` and adjust your settings.
Copy `serves3.toml.example` to `serves3.toml` and adjust your settings.
You can also add a `Rocket.toml` file to customize the server options. See the [Rocket documentation](https://rocket.rs/v0.5-rc/guide/configuration/#rockettoml).
You can also use the same file to customize the server options. See the [Rocket documentation](https://rocket.rs/v0.5-rc/guide/configuration/#rockettoml) for a list of understood values.
Then just configure Apache or NGINX to proxy to the given port. For example:
@ -73,3 +73,13 @@ cargo install --root /usr/local --path . # for instance
cd run-folder # folder with Settings.toml
serves3
```
# Changelog
## 1.1.0 Reworked configuration file logic
* **Breaking change**: configuration file renamed to `serves3.toml`. Please note that the format changed slightly; have a look at the provided `serves3.toml.example` file for reference.
## 1.0.0
* Initial release.

View File

@ -1,9 +1,10 @@
# SPDX-FileCopyrightText: Public domain.
# SPDX-License-Identifier: CC0-1.0
access_key_id = ""
secret_access_key = ""
bucket = ""
[default.s3_bucket]
name = ""
endpoint = "https://eu-central-1.linodeobjects.com"
region = "eu-central-1"
access_key_id = ""
secret_access_key = ""
path_style = false

View File

@ -1,72 +1,26 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod settings;
use {
anyhow::Result,
lazy_static::lazy_static,
rocket::response::Responder,
rocket::serde::Serialize,
rocket::{
fairing::AdHoc,
figment::{
providers::{Env, Format as _, Toml},
Profile,
},
response::Responder,
serde::Serialize,
State,
},
rocket_dyn_templates::{context, Template},
settings::Settings,
std::path::PathBuf,
};
struct Settings {
access_key_id: String,
secret_access_key: String,
bucket_name: String,
endpoint: String,
region: String,
}
lazy_static! {
static ref SETTINGS: Settings = {
let settings = config::Config::builder()
.add_source(config::File::with_name("Settings.toml"))
.add_source(config::Environment::with_prefix("SERVES3"))
.build()
.unwrap();
Settings {
access_key_id: settings
.get_string("access_key_id")
.expect("Missing configuration key access_key_id"),
secret_access_key: settings
.get_string("secret_access_key")
.expect("Missing configuration key secret_access_key"),
bucket_name: settings
.get_string("bucket")
.expect("Missing configuration key bucket"),
region: settings
.get_string("region")
.expect("Missing configuration key region"),
endpoint: settings
.get_string("endpoint")
.expect("Missing configuration key endpoint"),
}
};
static ref BUCKET: s3::bucket::Bucket = {
let region = s3::Region::Custom {
region: SETTINGS.region.clone(),
endpoint: SETTINGS.endpoint.clone(),
};
let credentials = s3::creds::Credentials::new(
Some(&SETTINGS.access_key_id),
Some(&SETTINGS.secret_access_key),
None,
None,
None,
)
.expect("Wrong server S3 configuration");
s3::bucket::Bucket::new(&SETTINGS.bucket_name, region, credentials)
.expect("Cannot find or authenticate to S3 bucket")
};
static ref FILEVIEW_TEMPLATE: &'static str = std::include_str!("../templates/index.html.tera");
// Workaround for https://github.com/SergioBenitez/Rocket/issues/1792
static ref EMPTY_DIR: tempfile::TempDir = tempfile::tempdir()
.expect("Unable to create an empty temporary folder, is the whole FS read-only?");
}
#[derive(Responder)]
enum FileView {
#[response(content_type = "text/html")]
@ -94,7 +48,7 @@ enum Error {
}
#[rocket::get("/<path..>")]
async fn index(path: PathBuf) -> Result<FileView, Error> {
async fn index(path: PathBuf, state: &State<Settings>) -> Result<FileView, Error> {
/*
The way things work in S3, the following holds for us:
- we need to use a slash as separator
@ -107,10 +61,10 @@ async fn index(path: PathBuf) -> Result<FileView, Error> {
we fallback to retrieving the equivalent folder.
*/
if let Ok(result) = s3_serve_file(&path).await {
if let Ok(result) = s3_serve_file(&path, &state).await {
Ok(result)
} else {
let objects = s3_fileview(&path).await?;
let objects = s3_fileview(&path, &state).await?;
let rendered = Template::render(
"index",
context! {
@ -122,7 +76,7 @@ async fn index(path: PathBuf) -> Result<FileView, Error> {
}
}
async fn s3_serve_file(path: &PathBuf) -> Result<FileView, Error> {
async fn s3_serve_file(path: &PathBuf, settings: &Settings) -> Result<FileView, Error> {
let is_root_prefix = path.as_os_str().is_empty();
if is_root_prefix {
return Err(Error::NotFound("Root prefix is not a file".into()));
@ -130,7 +84,8 @@ async fn s3_serve_file(path: &PathBuf) -> Result<FileView, Error> {
// FIXME: this can be big, we should use streaming,
// not loading in memory!
let response = BUCKET
let response = settings
.s3_bucket
.get_object(format!("{}", path.display()))
.await
.map_err(|_| Error::UnknownError("Unable to connect to S3 bucket".into()))?;
@ -145,7 +100,7 @@ async fn s3_serve_file(path: &PathBuf) -> Result<FileView, Error> {
}
}
async fn s3_fileview(path: &PathBuf) -> Result<Vec<FileViewItem>, Error> {
async fn s3_fileview(path: &PathBuf, settings: &Settings) -> Result<Vec<FileViewItem>, Error> {
/*
if listing a folder:
- folders will be under 'common_prefixes'
@ -158,8 +113,9 @@ async fn s3_fileview(path: &PathBuf) -> Result<Vec<FileViewItem>, Error> {
None => "".into(),
};
let s3_objects = BUCKET
.list(s3_folder_path.clone(), Some("/".into()))
let s3_objects = settings
.s3_bucket
.list(s3_folder_path, Some("/".into()))
.await
.map_err(|_| Error::NotFound("Object not found".into()))?;
@ -223,27 +179,35 @@ fn size_bytes_to_human(bytes: u64) -> String {
)
}
lazy_static! {
// Workaround for https://github.com/SergioBenitez/Rocket/issues/1792
static ref EMPTY_DIR: tempfile::TempDir = tempfile::tempdir()
.expect("Unable to create an empty temporary folder, is the whole FS read-only?");
}
#[rocket::launch]
fn rocket() -> _ {
eprintln!("Proxying to {} for {}", BUCKET.host(), BUCKET.name());
let config_figment = rocket::Config::figment().merge(("template_dir", EMPTY_DIR.path())); // We compile the templates in anyway.
let config_figment = rocket::Config::figment()
matteo marked this conversation as resolved Outdated

The last usage of a string can simply be moved instead of cloned to avoid a needless copy.

The last usage of a string can simply be moved instead of cloned to avoid a needless copy.
.merge(Toml::file("serves3.toml").nested())
.merge(Env::prefixed("SERVES3_").global())
.merge(("template_dir", EMPTY_DIR.path())) // We compile the templates in anyway
.select(Profile::from_env_or("SERVES3_PROFILE", "default"));
rocket::custom(config_figment)
.mount("/", rocket::routes![index])
.attach(AdHoc::config::<Settings>())
.attach(Template::custom(|engines| {
engines
.tera
.add_raw_template("index", *FILEVIEW_TEMPLATE)
.add_raw_template("index", std::include_str!("../templates/index.html.tera"))
.unwrap()
}))
}
// Test section starts
// -------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
@ -254,9 +218,7 @@ mod tests {
#[case(0, "0.000 B")]
#[case(u64::MAX, format!("{:.3} GB",u64::MAX as f64/(1_000_000_000.0)))]
#[case(u64::MIN, format!("{:.3} B",u64::MIN as f64))]
fn test_size_bytes_to_human(#[case] bytes: u64, #[case] expected: String) {
println!("{}", size_bytes_to_human(bytes));
assert_eq!(size_bytes_to_human(bytes), expected);
fn size_bytes_to_human(#[case] bytes: u64, #[case] expected: String) {
assert_eq!(super::size_bytes_to_human(bytes), expected);
}
}

67
src/settings.rs Normal file
View File

@ -0,0 +1,67 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use {anyhow::anyhow, rocket::serde::Deserialize, serde::de::Error};
#[derive(Deserialize)]
#[serde(crate = "rocket::serde")]
pub struct Settings {
#[serde(deserialize_with = "deserialize_s3_bucket")]
pub s3_bucket: s3::Bucket,
}
fn deserialize_s3_bucket<'de, D>(deserializer: D) -> Result<s3::Bucket, D::Error>
where
D: serde::Deserializer<'de>,
{
let config = S3Config::deserialize(deserializer)?;
config.try_into().map_err(D::Error::custom)
}
#[derive(Deserialize)]
pub struct S3Config {
pub name: String,
pub endpoint: String,
pub region: String,
#[serde(default)]
pub path_style: bool,
pub access_key_id: String,
pub secret_access_key: String,
}
impl TryInto<s3::Bucket> for S3Config {
type Error = anyhow::Error;
fn try_into(self) -> Result<s3::Bucket, Self::Error> {
let region = s3::Region::Custom {
region: self.region,
endpoint: self.endpoint,
};
let credentials = s3::creds::Credentials::new(
Some(&self.access_key_id),
Some(&self.secret_access_key),
None,
None,
None,
)?;
log::info!(
"Serving contents from bucket {} at {}",
&self.name,
region.endpoint()
);
let bucket = s3::Bucket::new(&self.name, region, credentials).map_err(|e| anyhow!(e));
if self.path_style {
bucket.map(|mut b| {
b.set_path_style();
b
})
} else {
bucket
}
}
}

View File

@ -5,11 +5,6 @@
-->
<html>
<head>
<script type="text/javascript">
if (window.location.pathname.endsWith('/') === false) {
window.location.href = window.location + "/";
}
</script>
<style>
body {
font-family: monospace;

38
tests/common/minio.rs Normal file
View File

@ -0,0 +1,38 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use {testcontainers::core::WaitFor, testcontainers::Image, testcontainers_modules::minio};
const MINIO_IMAGE_TAG: &'static str = "RELEASE.2024-05-28T17-19-04Z";
pub struct MinIO {
inner: minio::MinIO,
}
impl Image for MinIO {
type Args = minio::MinIOServerArgs;
fn name(&self) -> String {
self.inner.name()
}
fn ready_conditions(&self) -> Vec<WaitFor> {
vec![WaitFor::message_on_stderr("API:")]
}
fn tag(&self) -> String {
MINIO_IMAGE_TAG.into()
}
fn env_vars(&self) -> Box<dyn Iterator<Item = (&String, &String)> + '_> {
self.inner.env_vars()
}
}
impl Default for MinIO {
fn default() -> Self {
Self {
inner: Default::default(),
}
}
}

123
tests/common/mod.rs Normal file
View File

@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod minio;
use {
anyhow::{anyhow, Result},
reqwest::Url,
std::{ptr::null_mut, str::FromStr},
testcontainers::{runners::AsyncRunner, ContainerAsync},
tokio::io::AsyncBufReadExt as _,
};
pub struct Test {
pub base_url: Url,
pub bucket: s3::Bucket,
pub serves3: tokio::process::Child,
pub minio: ContainerAsync<minio::MinIO>,
}
const MAXIMUM_SERVES3_INIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
const BUCKET_NAME: &'static str = "integration-test-bucket";
const REGION: &'static str = "test-region";
const ACCESS_KEY: &'static str = "minioadmin";
const SECRET_KEY: &'static str = "minioadmin";
impl Test {
pub async fn new() -> Result<Self> {
// NOTE: right now there is a bug in bollard
// that makes testcontainers work in Docker only and
// not podman (it is not able to fetch exposed ports).
// If this test fails make sure you are using docker.
std::env::remove_var("DOCKER_HOST");
let image = minio::MinIO::default();
let container = image.start().await?;
let endpoint = format!(
"http://{host}:{port}",
host = container.get_host().await?,
port = container.get_host_port_ipv4(9000).await?
);
let credentials = s3::creds::Credentials::new(
Some(&ACCESS_KEY),
Some(&SECRET_KEY),
None,
None,
Some("test"),
)?;
let bucket = s3::Bucket::create_with_path_style(
&BUCKET_NAME,
s3::Region::Custom {
region: REGION.into(),
endpoint: endpoint.clone(),
},
credentials,
s3::BucketConfiguration::private(),
)
.await?
.bucket;
let bin = std::env!("CARGO_BIN_EXE_serves3");
let mut child = tokio::process::Command::new(bin)
.env("SERVES3_ADDRESS", "127.0.0.1")
.env("SERVES3_PORT", "0")
.env("SERVES3_LOG_LEVEL", "debug")
.env(
"SERVES3_S3_BUCKET",
format!(
r#"{{
name = "{name}",
endpoint = "{endpoint}",
region = "{region}",
access_key_id = "{user}",
secret_access_key = "{secret}",
path_style = true
}}"#,
name = BUCKET_NAME,
endpoint = endpoint,
region = &REGION,
user = ACCESS_KEY,
secret = SECRET_KEY
),
)
.stdout(std::process::Stdio::piped())
.spawn()?;
let base_url = tokio::time::timeout(MAXIMUM_SERVES3_INIT_TIMEOUT, async {
let stdout = child.stdout.as_mut().unwrap();
let mut lines = tokio::io::BufReader::new(stdout).lines();
let re = regex::Regex::new("^Rocket has launched from (http://.+)$").unwrap();
while let Some(line) = lines.next_line().await? {
println!("{}", &line);
if let Some(captures) = re.captures(&line) {
let url = captures.get(1).unwrap().as_str();
return Ok(Url::from_str(url)?);
}
}
Err(anyhow!("Rocket did not print that it has started"))
})
.await??;
Ok(Self {
base_url,
bucket,
serves3: child,
minio: container,
})
}
}
impl Drop for Test {
fn drop(&mut self) {
unsafe {
let pid = self.serves3.id().unwrap() as i32;
libc::kill(pid, libc::SIGTERM);
libc::waitpid(pid, null_mut(), 0);
}
}
}

158
tests/integration.rs Normal file
View File

@ -0,0 +1,158 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod common;
use scraper::{Html, Selector};
#[test_log::test(tokio::test)]
async fn serves_files() -> anyhow::Result<()> {
let test = common::Test::new().await?;
test.bucket
.put_object("file.txt", "I am a file".as_bytes())
.await?;
test.bucket
.put_object("folder/file.txt", "I am a file in a folder".as_bytes())
.await?;
let resp = reqwest::get(test.base_url.join("file.txt")?).await?;
assert_eq!(resp.bytes().await?, "I am a file");
let resp = reqwest::get(test.base_url.join("folder/file.txt")?).await?;
assert_eq!(resp.bytes().await?, "I am a file in a folder");
Ok(())
}
#[test_log::test(tokio::test)]
async fn serves_top_level_folder() -> anyhow::Result<()> {
let test = common::Test::new().await?;
test.bucket
.put_object("file.txt", "I am a file".as_bytes())
.await?;
test.bucket
.put_object("folder/file.txt", "I am a file in a folder".as_bytes())
.await?;
// Check that a file in the toplevel is listed:
let resp = reqwest::get(test.base_url.clone()).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("folder/"));
assert_eq!(item.text().next(), Some("folder/"));
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.text().next(), Some("file.txt"));
}
Ok(())
}
#[test_log::test(tokio::test)]
async fn serves_second_level_folder() -> anyhow::Result<()> {
let test = common::Test::new().await?;
test.bucket
.put_object("file.txt", "I am a file".as_bytes())
.await?;
test.bucket
.put_object("folder/file.txt", "I am a file in a folder".as_bytes())
.await?;
// Check that a file in the second level is listed:
let resp = reqwest::get(test.base_url.join("folder/")?).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "folder/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("../"));
assert_eq!(item.inner_html(), "..");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.inner_html(), "file.txt");
}
Ok(())
}
#[test_log::test(tokio::test)]
async fn serves_second_level_folder_without_ending_slash() -> anyhow::Result<()> {
let test = common::Test::new().await?;
test.bucket
.put_object("file.txt", "I am a file".as_bytes())
.await?;
test.bucket
.put_object("folder/file.txt", "I am a file in a folder".as_bytes())
.await?;
// Check that a file in the second level is listed even without an ending slash:
let resp = reqwest::get(test.base_url.join("folder")?).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "folder/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("../"));
assert_eq!(item.inner_html(), "..");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.inner_html(), "file.txt");
}
Ok(())
}