implement ssd-friendly keep index to cleanup removed entries on fof/upload sync; drop external rsync option

This commit is contained in:
yggverse 2026-03-30 11:40:04 +03:00
parent e75f973de4
commit 18762c6b74
5 changed files with 71 additions and 104 deletions

26
Cargo.lock generated
View file

@ -96,9 +96,9 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.57" version = "1.2.58"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
dependencies = [ dependencies = [
"find-msvc-tools", "find-msvc-tools",
"shlex", "shlex",
@ -195,7 +195,7 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]] [[package]]
name = "flarumdown" name = "flarumdown"
version = "0.3.1" version = "0.4.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono", "chrono",
@ -279,9 +279,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.91" version = "0.3.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"wasm-bindgen", "wasm-bindgen",
@ -617,9 +617,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.114" version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"once_cell", "once_cell",
@ -630,9 +630,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.114" version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
@ -640,9 +640,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.114" version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"proc-macro2", "proc-macro2",
@ -653,9 +653,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.114" version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]

View file

@ -1,6 +1,6 @@
[package] [package]
name = "flarumdown" name = "flarumdown"
version = "0.3.1" version = "0.4.0"
edition = "2024" edition = "2024"
license = "MIT" license = "MIT"
readme = "README.md" readme = "README.md"

View file

@ -26,45 +26,10 @@ cargo install flarumdown
``` bash ``` bash
RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \ RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \
-t '/path/to/target' \ -t '/path/to/target' \
-p '/path/to/public' \
-i index \ -i index \
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \ -r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \ -r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \ -r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion -r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
``` ```
### rsync
Optionally, delegate `-u` to `rsync` by using `crontab -e`:
``` bash
#!/bin/bash
mkdir -p /var/www/flarum/public/flarumdown/dump/assets/files
# collect FoF/upload files
readonly RSYNC_FILTER_P="*-thumb.webp"
readonly RSYNC_TARGET_D="/var/www/flarum/public/flarumdown/dump/assets"
find "$RSYNC_TARGET_D" -name "$RSYNC_FILTER_P" -type f -delete # rsync has --filter, cleanup
/usr/bin/rsync -av --delete --filter="-p $RSYNC_FILTER_P" \
/var/www/flarum/public/assets/files \
$RSYNC_TARGET_D
# dump the DB
RUST_LOG=warn /usr/local/bin/flarumdown -s /var/www/flarum/flarum.sqlite \
-t /var/www/flarum/public/flarumdown/dump \
-i index \
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
# create .zip file to simply download for offline reading
readonly TARGET_DIR=/var/www/flarum/public/flarumdown/dump
cd "$TARGET_DIR"
if [ "$(pwd)" != "$TARGET_DIR" ]; then
echo "Unexpected path!"
exit 1
fi
zip -r -9 /var/www/flarum/public/flarumdown/dump.zip .
```

View file

@ -9,13 +9,12 @@ pub struct Config {
pub source: PathBuf, pub source: PathBuf,
/// Path to export FoF/upload files from /// Path to export FoF/upload files from
/// tips: /// * the root is path to the public dir (e.g. `/var/www/flarum/public`)
/// * root should be the path to `flarum/public` dir
/// * use rsync instead of this option for longer SSD life
#[arg(short, long)] #[arg(short, long)]
pub upload: Option<PathBuf>, pub public: PathBuf,
/// Path to export markdown /// Path to export markdown
/// * e.g. `/var/www/flarum/public/flarumdown/dump`
#[arg(short, long)] #[arg(short, long)]
pub target: PathBuf, pub target: PathBuf,

View file

@ -11,7 +11,7 @@ use regex::{Captures, Regex};
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
env::var, env::var,
fs::{File, copy, create_dir_all, read_dir, remove_dir_all, remove_file}, fs::{File, copy, create_dir_all, read_dir, remove_file},
io::Write, io::Write,
path::PathBuf, path::PathBuf,
}; };
@ -56,18 +56,10 @@ fn main() -> Result<()> {
if !config.target.exists() { if !config.target.exists() {
create_dir_all(&config.target)?; create_dir_all(&config.target)?;
} }
for entry in read_dir(&config.target)? {
let path = entry?.path();
if path.is_file() {
remove_file(path)?;
} else if path.is_dir() && config.upload.is_some() {
remove_dir_all(path)?;
}
}
let mut db = Database::connect(config.source)?; let mut db = Database::connect(config.source)?;
let mut keep = HashSet::with_capacity(1000); // @TODO count entries expected from the DB
let mut users = HashMap::new(); let mut users = HashMap::with_capacity(100); // @TODO count entries expected from the DB
for user in db.users()? { for user in db.users()? {
assert!( assert!(
users users
@ -81,7 +73,7 @@ fn main() -> Result<()> {
) )
} }
let mut tags = HashMap::new(); let mut tags = HashMap::with_capacity(100); // @TODO count entries expected from the DB
for tag in db.tags()? { for tag in db.tags()? {
if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) { if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) {
continue; continue;
@ -89,7 +81,7 @@ fn main() -> Result<()> {
assert!(tags.insert(tag.id, tag.slug).is_none()) assert!(tags.insert(tag.id, tag.slug).is_none())
} }
let mut discussions = Vec::new(); let mut discussions = Vec::with_capacity(1000); // @TODO count entries expected from the DB
for discussion in db.discussions()? { for discussion in db.discussions()? {
if !db if !db
.discussion_tag_ids(discussion.id)? .discussion_tag_ids(discussion.id)?
@ -100,7 +92,7 @@ fn main() -> Result<()> {
} }
assert!(users.contains_key(&discussion.user_id)); assert!(users.contains_key(&discussion.user_id));
let mut posts = Vec::new(); let mut posts = Vec::with_capacity(1000); // @TODO count entries expected from the DB
for post in db.posts(discussion.id)? { for post in db.posts(discussion.id)? {
posts.push(Post { posts.push(Post {
id: post.id, id: post.id,
@ -121,11 +113,13 @@ fn main() -> Result<()> {
} }
if let Some(index) = config.index { if let Some(index) = config.index {
let mut file = File::create_new({ let path = {
let mut path = PathBuf::from(&config.target); let mut path = PathBuf::from(&config.target);
path.push(format!("{}.md", index.trim_end_matches(".md"))); path.push(format!("{}.md", index.trim_end_matches(".md")));
path path
})?; };
let mut file = File::create(&path)?;
keep.insert(path);
for discussion in &discussions { for discussion in &discussions {
file.write_all( file.write_all(
format!( format!(
@ -151,11 +145,13 @@ fn main() -> Result<()> {
} }
for discussion in &discussions { for discussion in &discussions {
let mut file = File::create_new({ let path = {
let mut path = PathBuf::from(&config.target); let mut path = PathBuf::from(&config.target);
path.push(format!("{}.md", discussion.id)); path.push(format!("{}.md", discussion.id));
path path
})?; };
let mut file = File::create(&path)?;
keep.insert(path);
file.write_all( file.write_all(
{ {
let mut page = Vec::new(); let mut page = Vec::new();
@ -195,47 +191,42 @@ fn main() -> Result<()> {
post post
}); });
for upload in &uploads { for upload in &uploads {
let path_target = { let t = {
let mut p = PathBuf::from(&config.target); let mut p = PathBuf::from(&config.target);
p.push(upload); p.push(upload);
p p
}; };
match config.upload { let mut p = PathBuf::from(&config.public);
// upload option is active, p.push(upload);
// create files copy in the destinations match p.canonicalize() {
Some(ref upload_source) => { Ok(src) => {
let mut p = PathBuf::from(upload_source); if src.starts_with(&config.public) {
p.push(upload); if t.exists() {
match p.canonicalize() { debug!(
Ok(src) => { "Copied file `{}` for `{}` exists, skip overwrite",
if src.starts_with(upload_source) { t.to_string_lossy(),
if !path_target.exists() { src.to_string_lossy(),
create_dir_all(path_target.parent().unwrap())?; );
copy(src, path_target)?; } else {
} create_dir_all(t.parent().unwrap())?;
} else { copy(&src, &t)?;
warn!( debug!(
"Possible traversal injection: `{}` (post #{}, user #{})", "Copied file from `{}` to `{}`",
src.to_string_lossy(), src.to_string_lossy(),
post.id, t.to_string_lossy(),
post.user_id );
) keep.insert(t);
}
} }
Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id) } else {
}
},
// task delegated to rsync
// * manually pre-copied FoF/upload destinations must exist
None => {
if !path_target.exists() {
warn!( warn!(
"Referenced file does not exist: `{}` (post #{})", "Possible traversal injection: `{}` (post #{}, user #{})",
path_target.to_string_lossy(), src.to_string_lossy(),
post.id post.id,
post.user_id
) )
} }
} }
Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id)
} }
} }
content.push("\n---\n".into()) content.push("\n---\n".into())
@ -255,7 +246,19 @@ fn main() -> Result<()> {
.as_bytes(), .as_bytes(),
)? )?
} }
cleanup(&config.target, &keep)
}
/// Recursively removes entries that not exists in the `keep` registry
/// * empty directories cleanup yet not implemented @TODO
fn cleanup(target: &PathBuf, keep: &HashSet<PathBuf>) -> Result<()> {
for entry in read_dir(target)? {
let p = entry?.path();
if p.is_file() && !keep.contains(&p) {
remove_file(&p)?;
debug!("Cleanup file `{}`", p.to_string_lossy());
}
}
Ok(()) Ok(())
} }