Compare commits

..

No commits in common. "main" and "main" have entirely different histories.
main ... main

21 changed files with 1004 additions and 3437 deletions

2
.gitignore vendored
View file

@ -7,4 +7,4 @@
/build
/target
/serves3.toml
/Settings.toml

View file

@ -3,7 +3,7 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
rev: v4.5.0
hooks:
- id: check-yaml
name: Check YAML files syntax
@ -23,7 +23,7 @@ repos:
name: Ensure no trailing spaces at the end of lines
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
rev: v1.5.5
rev: v1.5.2
hooks:
- id: remove-crlf
name: Enforce LF instead of CRLF for newlines
@ -40,7 +40,7 @@ repos:
name: Check Rust code
- repo: https://github.com/fsfe/reuse-tool.git
rev: v5.0.2
rev: v2.1.0
hooks:
- id: reuse
name: Check copyright and license information
@ -56,6 +56,6 @@ repos:
- id: trufflehog
name: TruffleHog
description: Detect secrets in your data.
entry: bash -c 'podman run -v "$(pwd):/workdir" --rm docker.io/trufflesecurity/trufflehog:latest git file:///workdir' --only-verified
entry: bash -c 'podman run -v "$(pwd):/workdir" --rm docker.io/trufflesecurity/trufflehog:latest git file:///workdir'
language: system
stages: ["pre-commit", "pre-push"]
stages: ["commit", "push"]

View file

@ -2,10 +2,10 @@
// SPDX-License-Identifier: CC0-1.0
{
"recommendations": [
"fill-labs.dependi",
"tamasfe.even-better-toml",
"ritwickdey.LiveServer",
"rust-lang.rust-analyzer",
"serayuzgur.crates",
"vadimcn.vscode-lldb",
"zaaack.markdown-editor",
]

19
.vscode/launch.json vendored
View file

@ -42,25 +42,6 @@
},
"args": [],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug integration test 'integration'",
"cargo": {
"args": [
"test",
"--no-run",
"--test=integration",
"--package=serves3"
],
"filter": {
"name": "integration",
"kind": "test"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}

View file

@ -2,5 +2,4 @@
// SPDX-License-Identifier: CC0-1.0
{
"liveServer.settings.port": 8001,
"cmake.configureOnOpen": true,
}

View file

@ -3,15 +3,13 @@
cmake_minimum_required(VERSION 3.25)
project(serves3 VERSION 1.1.0 LANGUAGES C)
include(GNUInstallDirs)
project(serves3 VERSION 1.0.0 LANGUAGES NONE)
include(FetchContent)
FetchContent_Declare(
Corrosion
GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git
GIT_TAG v0.5.0
GIT_TAG v0.4.1
)
FetchContent_MakeAvailable(Corrosion)
@ -22,7 +20,3 @@ corrosion_import_crate(
message(STATUS "Imported crates: ${imported_crates}")
install(IMPORTED_RUNTIME_ARTIFACTS serves3)
install(FILES serves3.toml.example
DESTINATION ${CMAKE_INSTALL_DOCDIR})
install(FILES serves3@.service
DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/systemd/system)

3240
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,7 @@
[package]
name = "serves3"
version = "1.2.0"
version = "1.0.0"
authors = ["Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>"]
description = "A very simple proxy to browse files from private S3 buckets"
@ -15,33 +15,16 @@ readme = "README.md"
keywords = ["s3", "proxy", "bucket"]
categories = ["command-line-utilities", "web-programming::http-server"]
edition = "2024"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0"
bytes = "1.10"
futures = "0.3"
config = "0.13"
human-size = "0.4"
lazy_static = "1.4"
log = "0.4"
rocket = "0.5"
rocket_dyn_templates = { version = "0.2.0", features = ["tera"] }
object_store = { version = "0.12", features = ["aws"] }
serde = "1.0"
tempfile = "3.21"
[dev-dependencies]
delegate = "0.13"
futures = "0.3"
libc = "0.2"
minio = "0.3"
regex = "1.11"
reqwest = "0.12"
rstest = "0.26"
scraper = "0.24"
test-log = "0.2"
testcontainers = "0.24"
testcontainers-modules = { version = "0.12", features = ["minio"] }
tokio = { version = "1", features = ["process", "rt-multi-thread"] }
rocket_dyn_templates = { version = "0.1.0", features = ["tera"] }
rust-s3 = { version = "0.33", default-features = false, features = ["tokio-native-tls"] }
serde = { version = "1.0" }
tempfile = { version = "3.6" }

View file

@ -1,21 +0,0 @@
# SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
# SPDX-License-Identifier: EUPL-1.2
FROM docker.io/rust:1.88-alpine3.20
RUN apk --no-cache add musl-dev && \
cargo install --path .
FROM docker.io/alpine:3.20
COPY --from=rust /usr/local/cargo/bin/serves3 /usr/bin/serves3
EXPOSE 8000
ENV ROCKET_PROFILE=production
# This env variable should be in the format:
# {endpoint=<endpoint>,region=<region>,access_key_id=<access_key_id>,secret_access_key=<secret_access_key>}
ENV ROCKET_S3_BUCKET={endpoint=,region=,access_key_id=,secret_access_key=}
CMD ["serves3"]

View file

@ -1,8 +1,5 @@
<!--
SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
SPDX-License-Identifier: EUPL-1.2
-->
[//]: # SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
[//]: # SPDX-License-Identifier: EUPL-1.2
# serves3
@ -14,39 +11,9 @@ Also helpful to do a different TLS termination.
## Configuration
Copy `serves3.toml.example` to `serves3.toml` from this project's sources and adjust your settings. If the project was built and installed via CMake, a copy of the example settings file is in `/usr/share/doc/serves3`.
Copy `Settings.toml.example` to `Settings.toml` and adjust your settings.
For instance:
```toml
# apply this configuration to Rocket's "default" profile
[default.s3_bucket]
# the bucket name
name = ""
# the API endpoint address
endpoint = "https://eu-central-1.linodeobjects.com"
# the bucket region
region = "eu-central-1"
# the access key ID
access_key_id = ""
# the access key secret
secret_access_key = ""
# whether to use path_style S3 URLs, see
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
path_style = false
# Here you can add any other rocket options, see
# https://rocket.rs/guide/v0.5/configuration/
[default]
[debug]
[release]
```
You can also use the same file to customize the server options. See the [Rocket documentation](https://rocket.rs/v0.5-rc/guide/configuration/#rockettoml) for a list of understood values.
You can also add a `Rocket.toml` file to customize the server options. See the [Rocket documentation](https://rocket.rs/v0.5-rc/guide/configuration/#rockettoml).
Then just configure Apache or NGINX to proxy to the given port. For example:
@ -92,10 +59,10 @@ Then, e.g. for running on port 8000, you would put the corresponding configurati
If you want more granular control on installation options, use CMake:
```bash
cmake -DCMAKE_INSTALL_PREFIX=/usr -B build .
cmake -B build .
cmake --build build
sudo cmake --install build
cd run-folder # folder with serves3.toml
cmake --install build
cd run-folder # folder with Settings.toml
serves3
```
@ -103,31 +70,6 @@ Else you can simply rely on `cargo`:
```bash
cargo install --root /usr/local --path . # for instance
cd run-folder # folder with serves3.toml
cd run-folder # folder with Settings.toml
serves3
```
# Changelog
## 1.2.0
* Migrate to [object_store](https://crates.io/crates/object_store). This should
allow streaming of bigger files.
* Bump dependencies
## 1.1.2
* Bump dependencies, adopt Rust 2024 edition
## 1.1.1
* Bump dependencies
## 1.1.0 Reworked configuration file logic
* **Breaking change**: configuration file renamed to `serves3.toml`. Please note that the format changed slightly; have a look at the provided `serves3.toml.example` file for reference.
* Fixes #2: URLs to directories not ending with a slash are not redirected properly
## 1.0.0
* Initial release.

9
Settings.toml.example Normal file
View file

@ -0,0 +1,9 @@
# SPDX-FileCopyrightText: Public domain.
# SPDX-License-Identifier: CC0-1.0
access_key_id = ""
secret_access_key = ""
bucket = ""
endpoint = "https://eu-central-1.linodeobjects.com"
region = "eu-central-1"

178
deny.toml
View file

@ -12,11 +12,6 @@
# The values provided in this template are the default values that will be used
# when any section or field is not specified in your own configuration
# Root options
# The graph table configures how the dependency graph is constructed and thus
# which crates the checks are performed against
[graph]
# If 1 or more target triples (and optionally, target_features) are specified,
# only the specified targets will be checked when running `cargo deny check`.
# This means, if a particular package is only ever used as a target specific
@ -28,57 +23,46 @@
targets = [
# The triple can be any string, but only the target triples built in to
# rustc (as of 1.40) can be checked against actual config expressions
#"x86_64-unknown-linux-musl",
#{ triple = "x86_64-unknown-linux-musl" },
# You can also specify which target_features you promise are enabled for a
# particular target. target_features are currently not validated against
# the actual valid features supported by the target architecture.
#{ triple = "wasm32-unknown-unknown", features = ["atomics"] },
]
# When creating the dependency graph used as the source of truth when checks are
# executed, this field can be used to prune crates from the graph, removing them
# from the view of cargo-deny. This is an extremely heavy hammer, as if a crate
# is pruned from the graph, all of its dependencies will also be pruned unless
# they are connected to another crate in the graph that hasn't been pruned,
# so it should be used with care. The identifiers are [Package ID Specifications]
# (https://doc.rust-lang.org/cargo/reference/pkgid-spec.html)
#exclude = []
# If true, metadata will be collected with `--all-features`. Note that this can't
# be toggled off if true, if you want to conditionally enable `--all-features` it
# is recommended to pass `--all-features` on the cmd line instead
all-features = true
# If true, metadata will be collected with `--no-default-features`. The same
# caveat with `all-features` applies
no-default-features = false
# If set, these feature will be enabled when collecting metadata. If `--features`
# is specified on the cmd line they will take precedence over this option.
#features = []
# The output table provides options for how/if diagnostics are outputted
[output]
# When outputting inclusion graphs in diagnostics that include features, this
# option can be used to specify the depth at which feature edges will be added.
# This option is included since the graphs can be quite large and the addition
# of features from the crate(s) to all of the graph roots can be far too verbose.
# This option can be overridden via `--feature-depth` on the cmd line
feature-depth = 1
# This section is considered when running `cargo deny check advisories`
# More documentation for the advisories section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
[advisories]
# The path where the advisory databases are cloned/fetched into
#db-path = "$CARGO_HOME/advisory-dbs"
# The path where the advisory database is cloned/fetched into
db-path = "~/.cargo/advisory-db"
# The url(s) of the advisory databases to use
#db-urls = ["https://github.com/rustsec/advisory-db"]
db-urls = ["https://github.com/rustsec/advisory-db"]
# The lint level for security vulnerabilities
vulnerability = "deny"
# The lint level for unmaintained crates
unmaintained = "warn"
# The lint level for crates that have been yanked from their source registry
yanked = "warn"
# The lint level for crates with security notices. Note that as of
# 2019-12-17 there are no security notice advisories in
# https://github.com/rustsec/advisory-db
notice = "warn"
# A list of advisory IDs to ignore. Note that ignored advisories will still
# output a note when they are encountered.
ignore = [
#"RUSTSEC-0000-0000",
#{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" },
#"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish
#{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" },
{ id = "RUSTSEC-2024-0388", reason = "derivative is used indirectly through minio, which is used only for testing" },
]
# Threshold for security vulnerabilities, any vulnerability with a CVSS score
# lower than the range specified will be ignored. Note that ignored advisories
# will still output a note when they are encountered.
# * None - CVSS Score 0.0
# * Low - CVSS Score 0.1 - 3.9
# * Medium - CVSS Score 4.0 - 6.9
# * High - CVSS Score 7.0 - 8.9
# * Critical - CVSS Score 9.0 - 10.0
#severity-threshold =
# If this is true, then cargo deny will use the git executable to fetch advisory database.
# If this is false, then it uses a built-in git library.
# Setting this to true can be helpful if you have special authentication requirements that cargo-deny does not support.
@ -89,19 +73,36 @@ ignore = [
# More documentation for the licenses section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
[licenses]
# The lint level for crates which do not have a detectable license
unlicensed = "deny"
# List of explicitly allowed licenses
# See https://spdx.org/licenses/ for list of possible licenses
# [possible values: any SPDX 3.11 short identifier (+ optional exception)].
allow = [
"EUPL-1.2",
"BSD-3-Clause",
"CC0-1.0",
"ISC",
"MIT",
"Apache-2.0",
"Unicode-3.0",
#"MIT",
#"Apache-2.0",
#"Apache-2.0 WITH LLVM-exception",
]
# List of explicitly disallowed licenses
# See https://spdx.org/licenses/ for list of possible licenses
# [possible values: any SPDX 3.11 short identifier (+ optional exception)].
deny = [
#"Nokia",
]
# Lint level for licenses considered copyleft
copyleft = "allow"
# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses
# * both - The license will be approved if it is both OSI-approved *AND* FSF
# * either - The license will be approved if it is either OSI-approved *OR* FSF
# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF
# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved
# * neither - This predicate is ignored and the default lint level is used
allow-osi-fsf-free = "either"
# Lint level used when no other predicates are matched
# 1. License isn't in the allow or deny lists
# 2. License isn't copyleft
# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither"
default = "deny"
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
# canonical license text of a valid SPDX license file.
@ -112,26 +113,28 @@ confidence-threshold = 0.8
exceptions = [
# Each entry is the crate and version constraint, and its specific allow
# list
#{ allow = ["Zlib"], crate = "adler32" },
#{ allow = ["Zlib"], name = "adler32", version = "*" },
]
# Some crates don't have (easily) machine readable licensing information,
# adding a clarification entry for it allows you to manually specify the
# licensing information
#[[licenses.clarify]]
# The package spec the clarification applies to
#crate = "ring"
[[licenses.clarify]]
# The name of the crate the clarification applies to
name = "ring"
# The optional version constraint for the crate
version = "*"
# The SPDX expression for the license requirements of the crate
#expression = "MIT AND ISC AND OpenSSL"
expression = "MIT AND ISC AND OpenSSL"
# One or more files in the crate's source used as the "source of truth" for
# the license expression. If the contents match, the clarification will be used
# when running the license check, otherwise the clarification will be ignored
# and the crate will be checked normally, which may produce warnings or errors
# depending on the rest of your configuration
#license-files = [
# Each entry is a crate relative path, and the (opaque) hash of its contents
#{ path = "LICENSE", hash = 0xbd0eed23 }
#]
license-files = [
# Each entry is a crate relative path, and the (opaque) hash of its contents
{ path = "LICENSE", hash = 0xbd0eed23 }
]
[licenses.private]
# If true, ignores workspace crates that aren't published, or are only
@ -160,63 +163,30 @@ wildcards = "allow"
# * simplest-path - The path to the version with the fewest edges is highlighted
# * all - Both lowest-version and simplest-path are used
highlight = "all"
# The default lint level for `default` features for crates that are members of
# the workspace that is being checked. This can be overridden by allowing/denying
# `default` on a crate-by-crate basis if desired.
workspace-default-features = "allow"
# The default lint level for `default` features for external crates that are not
# members of the workspace. This can be overridden by allowing/denying `default`
# on a crate-by-crate basis if desired.
external-default-features = "allow"
# List of crates that are allowed. Use with care!
allow = [
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" },
#{ name = "ansi_term", version = "=0.11.0" },
]
# List of crates to deny
deny = [
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" },
# Each entry the name of a crate and a version range. If version is
# not specified, all versions will be matched.
#{ name = "ansi_term", version = "=0.11.0" },
#
# Wrapper crates can optionally be specified to allow the crate when it
# is a direct dependency of the otherwise banned crate
#{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] },
#{ name = "ansi_term", version = "=0.11.0", wrappers = [] },
]
# List of features to allow/deny
# Each entry the name of a crate and a version range. If version is
# not specified, all versions will be matched.
#[[bans.features]]
#crate = "reqwest"
# Features to not allow
#deny = ["json"]
# Features to allow
#allow = [
# "rustls",
# "__rustls",
# "__tls",
# "hyper-rustls",
# "rustls",
# "rustls-pemfile",
# "rustls-tls-webpki-roots",
# "tokio-rustls",
# "webpki-roots",
#]
# If true, the allowed features must exactly match the enabled feature set. If
# this is set there is no point setting `deny`
#exact = true
# Certain crates/versions that will be skipped when doing duplicate detection.
skip = [
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" },
#{ name = "ansi_term", version = "=0.11.0" },
]
# Similarly to `skip` allows you to skip certain crates during duplicate
# detection. Unlike skip, it also includes the entire tree of transitive
# dependencies starting at the specified crate, up to a certain depth, which is
# by default infinite.
# by default infinite
skip-tree = [
#"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies
#{ crate = "ansi_term@0.11.0", depth = 20 },
#{ name = "ansi_term", version = "=0.11.0", depth = 20 },
]
# This section is considered when running `cargo deny check sources`.
@ -236,9 +206,9 @@ allow-registry = ["https://github.com/rust-lang/crates.io-index"]
allow-git = []
[sources.allow-org]
# github.com organizations to allow git sources for
github = []
# gitlab.com organizations to allow git sources for
gitlab = []
# bitbucket.org organizations to allow git sources for
bitbucket = []
# 1 or more github.com organizations to allow git sources for
#github = [""]
# 1 or more gitlab.com organizations to allow git sources for
#gitlab = [""]
# 1 or more bitbucket.org organizations to allow git sources for
#bitbucket = [""]

View file

@ -1,28 +0,0 @@
# SPDX-FileCopyrightText: Public domain.
# SPDX-License-Identifier: CC0-1.0
# apply this configuration to Rocket's "default" profile
[default.s3_bucket]
# the bucket name
name = ""
# the API endpoint address
endpoint = "https://eu-central-1.linodeobjects.com"
# the bucket region
region = "eu-central-1"
# the access key ID
access_key_id = ""
# the access key secret
secret_access_key = ""
# whether to use path_style S3 URLs, see
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
path_style = false
# Here you can add any other rocket options, see
# https://rocket.rs/guide/v0.5/configuration/
[default]
[debug]
[release]

View file

@ -1,19 +0,0 @@
# SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
# SPDX-License-Identifier: CC0-1.0
[Unit]
Description=ServeS3, a S3 proxy
StartLimitInterval=100
StartLimitBurst=10
[Service]
Type=simple
ExecStart=/usr/local/bin/serves3
WorkingDirectory=/etc/serves3/%i/
Environment=ROCKET_PORT=%i
Restart=always
RestartSec=5s
[Install]
WantedBy=multi-user.target

View file

@ -1,48 +1,79 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod settings;
mod sizes;
use {
anyhow::Result,
bytes::Bytes,
futures::{StreamExt, stream::BoxStream},
lazy_static::lazy_static,
object_store::path::Path as ObjectStorePath,
rocket::{
Request, State,
fairing::AdHoc,
figment::{
Profile,
providers::{Env, Format as _, Toml},
},
http::{ContentType, uri::Origin},
response::{self, Redirect, Responder, stream::ByteStream},
serde::Serialize,
},
rocket_dyn_templates::{Template, context},
settings::Settings,
rocket::response::Responder,
rocket::serde::Serialize,
rocket_dyn_templates::{context, Template},
std::path::PathBuf,
};
enum FileView {
Folder(Template),
Redirect(Redirect),
File(ByteStream<BoxStream<'static, Bytes>>),
struct Settings {
access_key_id: String,
secret_access_key: String,
bucket_name: String,
endpoint: String,
region: String,
}
impl<'r> Responder<'r, 'r> for FileView {
fn respond_to(self, req: &'r Request<'_>) -> response::Result<'r> {
match self {
Self::Folder(template) => template.respond_to(req).map(|mut r| {
r.set_header(ContentType::HTML);
r
}),
Self::Redirect(redirect) => redirect.respond_to(req),
Self::File(stream) => stream.respond_to(req),
lazy_static! {
static ref SETTINGS: Settings = {
let settings = config::Config::builder()
.add_source(config::File::with_name("Settings.toml"))
.add_source(config::Environment::with_prefix("SERVES3"))
.build()
.unwrap();
Settings {
access_key_id: settings
.get_string("access_key_id")
.expect("Missing configuration key access_key_id"),
secret_access_key: settings
.get_string("secret_access_key")
.expect("Missing configuration key secret_access_key"),
bucket_name: settings
.get_string("bucket")
.expect("Missing configuration key bucket"),
region: settings
.get_string("region")
.expect("Missing configuration key region"),
endpoint: settings
.get_string("endpoint")
.expect("Missing configuration key endpoint"),
}
}
};
static ref BUCKET: s3::bucket::Bucket = {
let region = s3::Region::Custom {
region: SETTINGS.region.clone(),
endpoint: SETTINGS.endpoint.clone(),
};
let credentials = s3::creds::Credentials::new(
Some(&SETTINGS.access_key_id),
Some(&SETTINGS.secret_access_key),
None,
None,
None,
)
.expect("Wrong server S3 configuration");
s3::bucket::Bucket::new(&SETTINGS.bucket_name, region, credentials)
.expect("Cannot find or authenticate to S3 bucket")
};
static ref FILEVIEW_TEMPLATE: &'static str = std::include_str!("../templates/index.html.tera");
// Workaround for https://github.com/SergioBenitez/Rocket/issues/1792
static ref EMPTY_DIR: tempfile::TempDir = tempfile::tempdir()
.expect("Unable to create an empty temporary folder, is the whole FS read-only?");
}
#[derive(Responder)]
enum FileView {
#[response(content_type = "text/html")]
Folder(Template),
#[response(content_type = "application/octet-stream")]
File(Vec<u8>),
}
#[derive(Serialize)]
@ -58,175 +89,152 @@ enum Error {
#[response(status = 404)]
NotFound(String),
#[response(status = 400)]
InvalidRequest(String),
#[response(status = 500)]
UnknownError(String),
}
impl From<object_store::Error> for Error {
fn from(value: object_store::Error) -> Self {
match value {
object_store::Error::NotFound { path, source: _ } => {
Self::NotFound(format!("object not found at {}", path))
}
err => Error::UnknownError(err.to_string()),
}
}
}
#[rocket::get("/")]
async fn index_root(uri: &Origin<'_>, state: &State<Settings>) -> Result<FileView, Error> {
index(None, uri, state).await
}
#[rocket::get("/<path..>")]
async fn index(
path: Option<PathBuf>,
uri: &Origin<'_>,
state: &State<Settings>,
) -> Result<FileView, Error> {
let object_path = if let Some(url_path) = path.as_ref() {
let s = url_path.to_str().ok_or(Error::InvalidRequest(
"Path cannot be converted to UTF-8".into(),
))?;
async fn index(path: PathBuf) -> Result<FileView, Error> {
/*
The way things work in S3, the following holds for us:
- we need to use a slash as separator
- folders need to be queried ending with a slash
- getting the bucket address (empty prefix) will
return an XML file with all properties; we don't
want that.
Some(ObjectStorePath::from(s))
We try first to retrieve list an object as a file. If we fail,
we fallback to retrieving the equivalent folder.
*/
if let Ok(result) = s3_serve_file(&path).await {
Ok(result)
} else {
None
};
// We try first to retrieve list an object as a file.
if let Some(object_path) = &object_path
&& object_exists(object_path, &state).await?
{
log::info!("serving S3 object at {}", &object_path);
return serve_object(&object_path, &state).await;
}
// If we fail, we fallback to retrieving the equivalent folder.
// For hyperlinks in the generated HTML to work properly, let's
// normalize the path to end with a slash.
if !uri.path().ends_with("/") {
// If the path does not end with a slash, we redirect to
// the normalized path with a slash appended.
let redirect = uri
.map_path(|p| format!("{}/", p))
.expect("cannot append slash to origin URL, this should never happen!");
return Ok(FileView::Redirect(Redirect::permanent(
redirect.to_string(),
)));
}
// We can now assume we have a full path to a folder,
// ending with a slash.
let path = path.unwrap_or_default();
log::info!("listing S3 objects at {}", path.display());
let objects = file_view(object_path, &state).await?;
let rendered = Template::render(
"index",
context! {
path: format!("{}/", path.display()),
objects
},
);
Ok(FileView::Folder(rendered))
}
async fn object_exists(s3_path: &ObjectStorePath, settings: &Settings) -> Result<bool, Error> {
log::debug!("checking existence of S3 object at {}", s3_path);
match settings.s3_bucket.head(s3_path).await {
Ok(_metadata) => Ok(true),
Err(object_store::Error::NotFound { path: _, source: _ }) => Ok(false),
Err(e) => Err(Error::UnknownError(e.to_string())),
let objects = s3_fileview(&path).await?;
let rendered = Template::render(
"index",
context! {
path: format!("{}/", path.display()),
objects
},
);
Ok(FileView::Folder(rendered))
}
}
async fn serve_object(s3_path: &ObjectStorePath, settings: &Settings) -> Result<FileView, Error> {
let object_stream = settings
.s3_bucket
.get(&s3_path)
async fn s3_serve_file(path: &PathBuf) -> Result<FileView, Error> {
let is_root_prefix = path.as_os_str().is_empty();
if is_root_prefix {
return Err(Error::NotFound("Root prefix is not a file".into()));
}
// FIXME: this can be big, we should use streaming,
// not loading in memory!
let response = BUCKET
.get_object(format!("{}", path.display()))
.await
.map_err(Error::from)?
.into_stream();
.map_err(|_| Error::UnknownError("Unable to connect to S3 bucket".into()))?;
let s3_path = s3_path.clone();
let stream = object_stream
.map(move |chunk| match chunk {
Ok(bytes) => bytes,
Err(err) => {
log::error!("connection error while reading {}: {}", s3_path, err);
Bytes::new() // Forces end of stream
}
})
.boxed();
// TODO: unfortunately Rocket does not have a ByteStream with a Result per chunk,
// meaning that if there is a failure in the middle of the stream, the best we can do is...
// nothing? Panic? All options are bad.
Ok(FileView::File(ByteStream::from(stream)))
match response.status_code() {
200 | 204 => {
let bytes = response.bytes().to_vec();
Ok(FileView::File(bytes))
}
404 => Err(Error::NotFound("Object not found".into())),
_ => Err(Error::UnknownError("Unknown S3 error".into())),
}
}
async fn file_view(
s3_folder_path: Option<ObjectStorePath>,
settings: &Settings,
) -> Result<Vec<FileViewItem>, Error> {
async fn s3_fileview(path: &PathBuf) -> Result<Vec<FileViewItem>, Error> {
/*
if listing a folder:
- folders will be under 'common_prefixes'
- files will be under the 'contents' property
*/
let s3_objects = settings
.s3_bucket
.list_with_delimiter(s3_folder_path.as_ref())
let parent = path.parent();
let s3_folder_path = match parent {
Some(_) => format!("{}/", path.display()),
None => "".into(),
};
let s3_objects = BUCKET
.list(s3_folder_path, Some("/".into()))
.await
.map_err(Error::from)?;
.map_err(|_| Error::NotFound("Object not found".into()))?;
let folders = s3_objects.common_prefixes.into_iter().map(|dir| {
let dirname = dir.parts().last().unwrap();
FileViewItem {
path: format!("{}/", dirname.as_ref().to_string()),
size_bytes: 0,
size: "[DIR]".to_owned(),
last_modification: String::default(),
}
});
let objects = s3_objects
.iter()
.flat_map(|list| -> Vec<Option<FileViewItem>> {
let prefix = if let Some(p) = &list.prefix {
p.as_str()
} else {
""
};
let files = s3_objects.objects.into_iter().map(|obj| FileViewItem {
path: obj.location.filename().unwrap().into(),
size_bytes: obj.size,
size: sizes::bytes_to_human(obj.size),
last_modification: obj.last_modified.to_rfc3339(),
});
let folders = list.common_prefixes.iter().flatten().map(|dir| {
let path = dir.prefix.strip_prefix(&prefix);
path.map(|path| FileViewItem {
path: path.to_owned(),
size_bytes: 0,
size: "[DIR]".to_owned(),
last_modification: String::default(),
})
});
Ok(folders.chain(files).collect())
let files = list.contents.iter().map(|obj| {
let path = obj.key.strip_prefix(&prefix);
path.map(|path| FileViewItem {
path: path.to_owned(),
size_bytes: obj.size,
size: size_bytes_to_human(obj.size),
last_modification: obj.last_modified.clone(),
})
});
folders.chain(files).collect()
})
.flatten()
.collect();
Ok(objects)
}
lazy_static! {
// Workaround for https://github.com/SergioBenitez/Rocket/issues/1792
static ref EMPTY_DIR: tempfile::TempDir = tempfile::tempdir()
.expect("unable to create an empty temporary folder, is the whole FS read-only?");
fn size_bytes_to_human(bytes: u64) -> String {
use human_size::{Any, SpecificSize};
let size: f64 = bytes as f64;
let digits = size.log10().floor() as u32;
let mut order = digits / 3;
let unit = match order {
0 => Any::Byte,
1 => Any::Kilobyte,
2 => Any::Megabyte,
_ => {
order = 3; // Let's stop here.
Any::Gigabyte
}
};
format!(
"{:.3}",
SpecificSize::new(size / 10u64.pow(order * 3) as f64, unit)
.unwrap_or(SpecificSize::new(0., Any::Byte).unwrap())
)
}
#[rocket::launch]
fn rocket() -> _ {
let config_figment = rocket::Config::figment()
.merge(Toml::file("serves3.toml").nested())
.merge(Env::prefixed("SERVES3_").global())
.merge(("template_dir", EMPTY_DIR.path())) // We compile the templates in anyway
.select(Profile::from_env_or("SERVES3_PROFILE", "default"));
eprintln!("Proxying to {} for {}", BUCKET.host(), BUCKET.name());
let config_figment = rocket::Config::figment().merge(("template_dir", EMPTY_DIR.path())); // We compile the templates in anyway.
rocket::custom(config_figment)
.mount("/", rocket::routes![index_root, index])
.attach(AdHoc::config::<Settings>())
.mount("/", rocket::routes![index])
.attach(Template::custom(|engines| {
engines
.tera
.add_raw_template("index", std::include_str!("../templates/index.html.tera"))
.add_raw_template("index", *FILEVIEW_TEMPLATE)
.unwrap()
}))
}

View file

@ -1,67 +0,0 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use {
object_store::{BackoffConfig, ObjectStore, RetryConfig, aws},
rocket::serde::Deserialize,
serde::de::Error,
std::time::Duration,
};
#[derive(Deserialize)]
#[serde(crate = "rocket::serde")]
pub struct Settings {
#[serde(deserialize_with = "deserialize_s3_bucket")]
pub s3_bucket: Box<dyn ObjectStore>,
}
fn deserialize_s3_bucket<'de, D>(deserializer: D) -> Result<Box<dyn ObjectStore>, D::Error>
where
D: serde::Deserializer<'de>,
{
let config = S3Config::deserialize(deserializer)?;
config.try_into().map_err(D::Error::custom)
}
#[derive(Deserialize)]
pub struct S3Config {
pub name: String,
pub endpoint: String,
pub region: String,
#[serde(default)]
pub path_style: bool,
pub access_key_id: String,
pub secret_access_key: String,
}
impl TryInto<Box<dyn ObjectStore>> for S3Config {
type Error = anyhow::Error;
fn try_into(self) -> Result<Box<dyn ObjectStore>, Self::Error> {
// TODO: support object stores other than than AWS
let object_store = aws::AmazonS3Builder::new()
.with_region(self.region)
.with_endpoint(&self.endpoint)
.with_bucket_name(&self.name)
.with_access_key_id(self.access_key_id)
.with_secret_access_key(self.secret_access_key)
.with_virtual_hosted_style_request(!self.path_style)
.with_allow_http(true)
.with_retry(RetryConfig {
max_retries: 1,
backoff: BackoffConfig::default(),
retry_timeout: Duration::from_millis(500),
})
.build()?;
log::info!(
"Serving contents from bucket {} at {}",
self.endpoint,
self.name,
);
Ok(Box::new(object_store))
}
}

View file

@ -1,44 +0,0 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
pub fn bytes_to_human(bytes: u64) -> String {
use human_size::{Any, SpecificSize};
let size: f64 = bytes as f64;
let digits = size.log10().floor() as u32;
let mut order = digits / 3;
let unit = match order {
0 => Any::Byte,
1 => Any::Kilobyte,
2 => Any::Megabyte,
_ => {
order = 3; // Let's stop here.
Any::Gigabyte
}
};
format!(
"{:.3}",
SpecificSize::new(size / 10u64.pow(order * 3) as f64, unit)
.unwrap_or(SpecificSize::new(0., Any::Byte).unwrap())
)
}
// -------------------------------------------------------------
#[cfg(test)]
mod tests {
use rstest::rstest;
#[rstest]
#[case(1024, "1.024 kB")]
#[case(10240, "10.240 kB")]
#[case(1024*1024, "1.049 MB")]
#[case(1024*1024*1024, "1.074 GB")]
#[case(0, "0.000 B")]
#[case(u64::MAX, format!("{:.3} GB",u64::MAX as f64/(1_000_000_000.0)))]
#[case(u64::MIN, format!("{:.3} B",u64::MIN as f64))]
fn bytes_to_human(#[case] bytes: u64, #[case] expected: String) {
assert_eq!(super::bytes_to_human(bytes), expected);
}
}

View file

@ -5,7 +5,6 @@
-->
<html>
<head>
<link rel="icon" href="data:image/png;base64,iVBORw0KGgo=">
<style>
body {
font-family: monospace;

View file

@ -1,54 +0,0 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
use {
delegate::delegate,
std::borrow::Cow,
testcontainers::{
Image,
core::{ContainerPort, WaitFor},
},
testcontainers_modules::minio,
};
const MINIO_IMAGE_TAG: &'static str = "RELEASE.2025-07-23T15-54-02Z";
pub struct MinIO {
inner: minio::MinIO,
}
impl Image for MinIO {
fn tag(&self) -> &str {
MINIO_IMAGE_TAG.into()
}
fn ready_conditions(&self) -> Vec<WaitFor> {
vec![WaitFor::message_on_stderr("API:")]
}
delegate! {
to self.inner {
fn name(&self) -> &str;
fn expose_ports(&self) -> &[ContainerPort];
fn env_vars(
&self,
) -> impl IntoIterator<Item = (impl Into<Cow<'_, str>>, impl Into<Cow<'_, str>>)>;
fn mounts(&self) -> impl IntoIterator<Item = &testcontainers::core::Mount>;
fn copy_to_sources(&self) -> impl IntoIterator<Item = &testcontainers::CopyToContainer>;
fn entrypoint(&self) -> Option<&str>;
fn cmd(&self) -> impl IntoIterator<Item = impl Into<std::borrow::Cow<'_, str>>>;
fn exec_after_start(
&self,
cs: testcontainers::core::ContainerState,
) -> Result<Vec<testcontainers::core::ExecCommand>, testcontainers::TestcontainersError>;
}
}
}
impl Default for MinIO {
fn default() -> Self {
Self {
inner: Default::default(),
}
}
}

View file

@ -1,124 +0,0 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod minio;
use {
::minio::s3::{
client::Client as MinIOClient, creds::StaticProvider, http::BaseUrl, types::S3Api,
},
anyhow::{Result, anyhow},
object_store::{ObjectStore, aws::AmazonS3Builder},
reqwest::Url,
std::{ptr::null_mut, str::FromStr},
testcontainers::{ContainerAsync, runners::AsyncRunner},
tokio::io::AsyncBufReadExt as _,
};
pub struct Test {
pub base_url: Url,
pub bucket: Box<dyn ObjectStore>,
pub serves3: tokio::process::Child,
pub _minio: ContainerAsync<minio::MinIO>,
}
const MAXIMUM_SERVES3_INIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
const BUCKET_NAME: &'static str = "integration-test-bucket";
const REGION: &'static str = "test-region";
const ACCESS_KEY: &'static str = "minioadmin";
const SECRET_KEY: &'static str = "minioadmin";
impl Test {
pub async fn new() -> Result<Self> {
// NOTE: this testsuite was setup to work
// against a recent version of podman,
// which correctly distinguishes between
// stdout and stderr of the running container.
let image = minio::MinIO::default();
let container = image.start().await?;
let endpoint = format!(
"http://{host}:{port}",
host = container.get_host().await?,
port = container.get_host_port_ipv4(9000).await?
);
// We need to create the bucket
let minio_client = MinIOClient::new(
BaseUrl::from_str(&endpoint).unwrap(),
Some(Box::new(StaticProvider::new(ACCESS_KEY, SECRET_KEY, None))),
None,
None,
)?;
minio_client.create_bucket(BUCKET_NAME).send().await?;
let bucket = AmazonS3Builder::new()
.with_endpoint(&endpoint)
.with_access_key_id(ACCESS_KEY)
.with_secret_access_key(SECRET_KEY)
.with_bucket_name(BUCKET_NAME)
.with_allow_http(true)
.build()?;
let bin = std::env!("CARGO_BIN_EXE_serves3");
let mut child = tokio::process::Command::new(bin)
.env("SERVES3_ADDRESS", "127.0.0.1")
.env("SERVES3_PORT", "0")
.env("SERVES3_LOG_LEVEL", "debug")
.env(
"SERVES3_S3_BUCKET",
format!(
r#"{{
name = "{name}",
endpoint = "{endpoint}",
region = "{region}",
access_key_id = "{user}",
secret_access_key = "{secret}",
path_style = true
}}"#,
name = BUCKET_NAME,
endpoint = endpoint,
region = &REGION,
user = ACCESS_KEY,
secret = SECRET_KEY
),
)
.stdout(std::process::Stdio::piped())
.spawn()?;
let base_url = tokio::time::timeout(MAXIMUM_SERVES3_INIT_TIMEOUT, async {
let stdout = child.stdout.as_mut().unwrap();
let mut lines = tokio::io::BufReader::new(stdout).lines();
let re = regex::Regex::new("^Rocket has launched from (http://.+)$").unwrap();
while let Some(line) = lines.next_line().await? {
println!("{}", &line);
if let Some(captures) = re.captures(&line) {
let url = captures.get(1).unwrap().as_str();
return Ok(Url::from_str(url)?);
}
}
Err(anyhow!("Rocket did not print that it has started"))
})
.await??;
Ok(Self {
base_url,
bucket: Box::new(bucket),
serves3: child,
_minio: container,
})
}
}
impl Drop for Test {
fn drop(&mut self) {
unsafe {
let pid = self.serves3.id().unwrap() as i32;
libc::kill(pid, libc::SIGTERM);
libc::waitpid(pid, null_mut(), 0);
}
}
}

View file

@ -1,163 +0,0 @@
// SPDX-FileCopyrightText: © Matteo Settenvini <matteo.settenvini@montecristosoftware.eu>
// SPDX-License-Identifier: EUPL-1.2
mod common;
use {
object_store::{PutPayload, path::Path as ObjectStorePath},
scraper::{Html, Selector},
};
async fn create_sample_files(test: &common::Test) -> anyhow::Result<()> {
test.bucket
.put(
&ObjectStorePath::from("file.txt"),
PutPayload::from_static("I am a file".as_bytes()),
)
.await?;
test.bucket
.put(
&ObjectStorePath::from("folder/file.txt"),
PutPayload::from_static("I am a file in a folder".as_bytes()),
)
.await?;
Ok(())
}
#[test_log::test(tokio::test(flavor = "multi_thread"))]
async fn serves_files() -> anyhow::Result<()> {
let test = common::Test::new().await?;
create_sample_files(&test).await?;
let resp = reqwest::get(test.base_url.join("file.txt")?).await?;
assert_eq!(resp.bytes().await?, "I am a file");
let resp = reqwest::get(test.base_url.join("folder/file.txt")?).await?;
assert_eq!(resp.bytes().await?, "I am a file in a folder");
Ok(())
}
#[test_log::test(tokio::test(flavor = "multi_thread"))]
async fn serves_top_level_folder() -> anyhow::Result<()> {
let test = common::Test::new().await?;
create_sample_files(&test).await?;
// Check that a file in the toplevel is listed:
let resp = reqwest::get(test.base_url.clone()).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
// Folders should be listed ending with a slash,
// or HTTP gets confused. This is also due to the
// normalization we do on the path in the main program.
assert_eq!(item.attr("href"), Some("folder/"));
assert_eq!(item.text().next(), Some("folder/"));
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.text().next(), Some("file.txt"));
}
Ok(())
}
#[test_log::test(tokio::test(flavor = "multi_thread"))]
async fn serves_second_level_folder() -> anyhow::Result<()> {
let test = common::Test::new().await?;
create_sample_files(&test).await?;
// Check that a file in the second level is listed:
let resp = reqwest::get(test.base_url.join("folder/")?).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "folder/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("../"));
assert_eq!(item.inner_html(), "..");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.inner_html(), "file.txt");
}
Ok(())
}
#[test_log::test(tokio::test(flavor = "multi_thread"))]
async fn serves_second_level_folder_without_ending_slash() -> anyhow::Result<()> {
let test = common::Test::new().await?;
create_sample_files(&test).await?;
// Check that a file in the second level is listed even without an ending slash:
let resp = reqwest::get(test.base_url.join("folder")?).await?;
assert!(
resp.status().is_success(),
"Request failed with {}",
resp.status()
);
// Ensure we were redirected to a URL ending with a slash
assert!(resp.url().path().ends_with("/"));
let text = resp.text().await?;
println!("{}", &text);
let document = Html::parse_document(&text);
let selector = Selector::parse(r#"h1"#).unwrap();
for title in document.select(&selector) {
assert_eq!(title.inner_html(), "folder/", "title doesn't match");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(1) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("../"));
assert_eq!(item.inner_html(), "..");
}
let selector =
Selector::parse(r#"table > tbody > tr:nth-child(2) > td:first-child > a"#).unwrap();
for item in document.select(&selector) {
assert_eq!(item.attr("href"), Some("file.txt"));
assert_eq!(item.inner_html(), "file.txt");
}
Ok(())
}