implement ssd-friendly keep index to cleanup removed entries on fof/upload sync; drop external rsync option

This commit is contained in:
yggverse 2026-03-30 11:40:04 +03:00
parent e75f973de4
commit 18762c6b74
5 changed files with 71 additions and 104 deletions

26
Cargo.lock generated
View file

@ -96,9 +96,9 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "cc"
version = "1.2.57"
version = "1.2.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
dependencies = [
"find-msvc-tools",
"shlex",
@ -195,7 +195,7 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "flarumdown"
version = "0.3.1"
version = "0.4.0"
dependencies = [
"anyhow",
"chrono",
@ -279,9 +279,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "js-sys"
version = "0.3.91"
version = "0.3.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
dependencies = [
"once_cell",
"wasm-bindgen",
@ -617,9 +617,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "wasm-bindgen"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
dependencies = [
"cfg-if",
"once_cell",
@ -630,9 +630,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -640,9 +640,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
dependencies = [
"bumpalo",
"proc-macro2",
@ -653,9 +653,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
dependencies = [
"unicode-ident",
]

View file

@ -1,6 +1,6 @@
[package]
name = "flarumdown"
version = "0.3.1"
version = "0.4.0"
edition = "2024"
license = "MIT"
readme = "README.md"

View file

@ -26,45 +26,10 @@ cargo install flarumdown
``` bash
RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \
-t '/path/to/target' \
-p '/path/to/public' \
-i index \
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
```
### rsync
Optionally, delegate `-u` to `rsync` by using `crontab -e`:
``` bash
#!/bin/bash
mkdir -p /var/www/flarum/public/flarumdown/dump/assets/files
# collect FoF/upload files
readonly RSYNC_FILTER_P="*-thumb.webp"
readonly RSYNC_TARGET_D="/var/www/flarum/public/flarumdown/dump/assets"
find "$RSYNC_TARGET_D" -name "$RSYNC_FILTER_P" -type f -delete # rsync has --filter, cleanup
/usr/bin/rsync -av --delete --filter="-p $RSYNC_FILTER_P" \
/var/www/flarum/public/assets/files \
$RSYNC_TARGET_D
# dump the DB
RUST_LOG=warn /usr/local/bin/flarumdown -s /var/www/flarum/flarum.sqlite \
-t /var/www/flarum/public/flarumdown/dump \
-i index \
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
# create .zip file to simply download for offline reading
readonly TARGET_DIR=/var/www/flarum/public/flarumdown/dump
cd "$TARGET_DIR"
if [ "$(pwd)" != "$TARGET_DIR" ]; then
echo "Unexpected path!"
exit 1
fi
zip -r -9 /var/www/flarum/public/flarumdown/dump.zip .
```

View file

@ -9,13 +9,12 @@ pub struct Config {
pub source: PathBuf,
/// Path to export FoF/upload files from
/// tips:
/// * root should be the path to `flarum/public` dir
/// * use rsync instead of this option for longer SSD life
/// * the root is path to the public dir (e.g. `/var/www/flarum/public`)
#[arg(short, long)]
pub upload: Option<PathBuf>,
pub public: PathBuf,
/// Path to export markdown
/// * e.g. `/var/www/flarum/public/flarumdown/dump`
#[arg(short, long)]
pub target: PathBuf,

View file

@ -11,7 +11,7 @@ use regex::{Captures, Regex};
use std::{
collections::{HashMap, HashSet},
env::var,
fs::{File, copy, create_dir_all, read_dir, remove_dir_all, remove_file},
fs::{File, copy, create_dir_all, read_dir, remove_file},
io::Write,
path::PathBuf,
};
@ -56,18 +56,10 @@ fn main() -> Result<()> {
if !config.target.exists() {
create_dir_all(&config.target)?;
}
for entry in read_dir(&config.target)? {
let path = entry?.path();
if path.is_file() {
remove_file(path)?;
} else if path.is_dir() && config.upload.is_some() {
remove_dir_all(path)?;
}
}
let mut db = Database::connect(config.source)?;
let mut users = HashMap::new();
let mut keep = HashSet::with_capacity(1000); // @TODO count entries expected from the DB
let mut users = HashMap::with_capacity(100); // @TODO count entries expected from the DB
for user in db.users()? {
assert!(
users
@ -81,7 +73,7 @@ fn main() -> Result<()> {
)
}
let mut tags = HashMap::new();
let mut tags = HashMap::with_capacity(100); // @TODO count entries expected from the DB
for tag in db.tags()? {
if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) {
continue;
@ -89,7 +81,7 @@ fn main() -> Result<()> {
assert!(tags.insert(tag.id, tag.slug).is_none())
}
let mut discussions = Vec::new();
let mut discussions = Vec::with_capacity(1000); // @TODO count entries expected from the DB
for discussion in db.discussions()? {
if !db
.discussion_tag_ids(discussion.id)?
@ -100,7 +92,7 @@ fn main() -> Result<()> {
}
assert!(users.contains_key(&discussion.user_id));
let mut posts = Vec::new();
let mut posts = Vec::with_capacity(1000); // @TODO count entries expected from the DB
for post in db.posts(discussion.id)? {
posts.push(Post {
id: post.id,
@ -121,11 +113,13 @@ fn main() -> Result<()> {
}
if let Some(index) = config.index {
let mut file = File::create_new({
let path = {
let mut path = PathBuf::from(&config.target);
path.push(format!("{}.md", index.trim_end_matches(".md")));
path
})?;
};
let mut file = File::create(&path)?;
keep.insert(path);
for discussion in &discussions {
file.write_all(
format!(
@ -151,11 +145,13 @@ fn main() -> Result<()> {
}
for discussion in &discussions {
let mut file = File::create_new({
let path = {
let mut path = PathBuf::from(&config.target);
path.push(format!("{}.md", discussion.id));
path
})?;
};
let mut file = File::create(&path)?;
keep.insert(path);
file.write_all(
{
let mut page = Vec::new();
@ -195,23 +191,31 @@ fn main() -> Result<()> {
post
});
for upload in &uploads {
let path_target = {
let t = {
let mut p = PathBuf::from(&config.target);
p.push(upload);
p
};
match config.upload {
// upload option is active,
// create files copy in the destinations
Some(ref upload_source) => {
let mut p = PathBuf::from(upload_source);
let mut p = PathBuf::from(&config.public);
p.push(upload);
match p.canonicalize() {
Ok(src) => {
if src.starts_with(upload_source) {
if !path_target.exists() {
create_dir_all(path_target.parent().unwrap())?;
copy(src, path_target)?;
if src.starts_with(&config.public) {
if t.exists() {
debug!(
"Copied file `{}` for `{}` exists, skip overwrite",
t.to_string_lossy(),
src.to_string_lossy(),
);
} else {
create_dir_all(t.parent().unwrap())?;
copy(&src, &t)?;
debug!(
"Copied file from `{}` to `{}`",
src.to_string_lossy(),
t.to_string_lossy(),
);
keep.insert(t);
}
} else {
warn!(
@ -224,19 +228,6 @@ fn main() -> Result<()> {
}
Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id)
}
},
// task delegated to rsync
// * manually pre-copied FoF/upload destinations must exist
None => {
if !path_target.exists() {
warn!(
"Referenced file does not exist: `{}` (post #{})",
path_target.to_string_lossy(),
post.id
)
}
}
}
}
content.push("\n---\n".into())
}
@ -255,7 +246,19 @@ fn main() -> Result<()> {
.as_bytes(),
)?
}
cleanup(&config.target, &keep)
}
/// Recursively removes entries that not exists in the `keep` registry
/// * empty directories cleanup yet not implemented @TODO
fn cleanup(target: &PathBuf, keep: &HashSet<PathBuf>) -> Result<()> {
for entry in read_dir(target)? {
let p = entry?.path();
if p.is_file() && !keep.contains(&p) {
remove_file(&p)?;
debug!("Cleanup file `{}`", p.to_string_lossy());
}
}
Ok(())
}