mirror of
https://github.com/YGGverse/flarumdown.git
synced 2026-03-31 08:45:28 +00:00
implement ssd-friendly keep index to cleanup removed entries on fof/upload sync; drop external rsync option
This commit is contained in:
parent
e75f973de4
commit
18762c6b74
5 changed files with 71 additions and 104 deletions
26
Cargo.lock
generated
26
Cargo.lock
generated
|
|
@ -96,9 +96,9 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.57"
|
version = "1.2.58"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
|
checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"find-msvc-tools",
|
"find-msvc-tools",
|
||||||
"shlex",
|
"shlex",
|
||||||
|
|
@ -195,7 +195,7 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flarumdown"
|
name = "flarumdown"
|
||||||
version = "0.3.1"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
|
@ -279,9 +279,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "js-sys"
|
name = "js-sys"
|
||||||
version = "0.3.91"
|
version = "0.3.92"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
|
checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
|
|
@ -617,9 +617,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen"
|
name = "wasm-bindgen"
|
||||||
version = "0.2.114"
|
version = "0.2.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
|
checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
|
@ -630,9 +630,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro"
|
name = "wasm-bindgen-macro"
|
||||||
version = "0.2.114"
|
version = "0.2.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
|
checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"quote",
|
"quote",
|
||||||
"wasm-bindgen-macro-support",
|
"wasm-bindgen-macro-support",
|
||||||
|
|
@ -640,9 +640,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro-support"
|
name = "wasm-bindgen-macro-support"
|
||||||
version = "0.2.114"
|
version = "0.2.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
|
checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
|
@ -653,9 +653,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-shared"
|
name = "wasm-bindgen-shared"
|
||||||
version = "0.2.114"
|
version = "0.2.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
|
checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "flarumdown"
|
name = "flarumdown"
|
||||||
version = "0.3.1"
|
version = "0.4.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
|
||||||
37
README.md
37
README.md
|
|
@ -26,45 +26,10 @@ cargo install flarumdown
|
||||||
``` bash
|
``` bash
|
||||||
RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \
|
RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \
|
||||||
-t '/path/to/target' \
|
-t '/path/to/target' \
|
||||||
|
-p '/path/to/public' \
|
||||||
-i index \
|
-i index \
|
||||||
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
|
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
|
||||||
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
|
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
|
||||||
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
|
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
|
||||||
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
|
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
|
||||||
```
|
```
|
||||||
|
|
||||||
### rsync
|
|
||||||
|
|
||||||
Optionally, delegate `-u` to `rsync` by using `crontab -e`:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
mkdir -p /var/www/flarum/public/flarumdown/dump/assets/files
|
|
||||||
|
|
||||||
# collect FoF/upload files
|
|
||||||
readonly RSYNC_FILTER_P="*-thumb.webp"
|
|
||||||
readonly RSYNC_TARGET_D="/var/www/flarum/public/flarumdown/dump/assets"
|
|
||||||
find "$RSYNC_TARGET_D" -name "$RSYNC_FILTER_P" -type f -delete # rsync has --filter, cleanup
|
|
||||||
/usr/bin/rsync -av --delete --filter="-p $RSYNC_FILTER_P" \
|
|
||||||
/var/www/flarum/public/assets/files \
|
|
||||||
$RSYNC_TARGET_D
|
|
||||||
|
|
||||||
# dump the DB
|
|
||||||
RUST_LOG=warn /usr/local/bin/flarumdown -s /var/www/flarum/flarum.sqlite \
|
|
||||||
-t /var/www/flarum/public/flarumdown/dump \
|
|
||||||
-i index \
|
|
||||||
-r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \
|
|
||||||
-r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \
|
|
||||||
-r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \
|
|
||||||
-r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion
|
|
||||||
|
|
||||||
# create .zip file to simply download for offline reading
|
|
||||||
readonly TARGET_DIR=/var/www/flarum/public/flarumdown/dump
|
|
||||||
cd "$TARGET_DIR"
|
|
||||||
if [ "$(pwd)" != "$TARGET_DIR" ]; then
|
|
||||||
echo "Unexpected path!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
zip -r -9 /var/www/flarum/public/flarumdown/dump.zip .
|
|
||||||
```
|
|
||||||
|
|
@ -9,13 +9,12 @@ pub struct Config {
|
||||||
pub source: PathBuf,
|
pub source: PathBuf,
|
||||||
|
|
||||||
/// Path to export FoF/upload files from
|
/// Path to export FoF/upload files from
|
||||||
/// tips:
|
/// * the root is path to the public dir (e.g. `/var/www/flarum/public`)
|
||||||
/// * root should be the path to `flarum/public` dir
|
|
||||||
/// * use rsync instead of this option for longer SSD life
|
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
pub upload: Option<PathBuf>,
|
pub public: PathBuf,
|
||||||
|
|
||||||
/// Path to export markdown
|
/// Path to export markdown
|
||||||
|
/// * e.g. `/var/www/flarum/public/flarumdown/dump`
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
pub target: PathBuf,
|
pub target: PathBuf,
|
||||||
|
|
||||||
|
|
|
||||||
85
src/main.rs
85
src/main.rs
|
|
@ -11,7 +11,7 @@ use regex::{Captures, Regex};
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
env::var,
|
env::var,
|
||||||
fs::{File, copy, create_dir_all, read_dir, remove_dir_all, remove_file},
|
fs::{File, copy, create_dir_all, read_dir, remove_file},
|
||||||
io::Write,
|
io::Write,
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
};
|
};
|
||||||
|
|
@ -56,18 +56,10 @@ fn main() -> Result<()> {
|
||||||
if !config.target.exists() {
|
if !config.target.exists() {
|
||||||
create_dir_all(&config.target)?;
|
create_dir_all(&config.target)?;
|
||||||
}
|
}
|
||||||
for entry in read_dir(&config.target)? {
|
|
||||||
let path = entry?.path();
|
|
||||||
if path.is_file() {
|
|
||||||
remove_file(path)?;
|
|
||||||
} else if path.is_dir() && config.upload.is_some() {
|
|
||||||
remove_dir_all(path)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut db = Database::connect(config.source)?;
|
let mut db = Database::connect(config.source)?;
|
||||||
|
let mut keep = HashSet::with_capacity(1000); // @TODO count entries expected from the DB
|
||||||
let mut users = HashMap::new();
|
let mut users = HashMap::with_capacity(100); // @TODO count entries expected from the DB
|
||||||
for user in db.users()? {
|
for user in db.users()? {
|
||||||
assert!(
|
assert!(
|
||||||
users
|
users
|
||||||
|
|
@ -81,7 +73,7 @@ fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut tags = HashMap::new();
|
let mut tags = HashMap::with_capacity(100); // @TODO count entries expected from the DB
|
||||||
for tag in db.tags()? {
|
for tag in db.tags()? {
|
||||||
if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) {
|
if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) {
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -89,7 +81,7 @@ fn main() -> Result<()> {
|
||||||
assert!(tags.insert(tag.id, tag.slug).is_none())
|
assert!(tags.insert(tag.id, tag.slug).is_none())
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut discussions = Vec::new();
|
let mut discussions = Vec::with_capacity(1000); // @TODO count entries expected from the DB
|
||||||
for discussion in db.discussions()? {
|
for discussion in db.discussions()? {
|
||||||
if !db
|
if !db
|
||||||
.discussion_tag_ids(discussion.id)?
|
.discussion_tag_ids(discussion.id)?
|
||||||
|
|
@ -100,7 +92,7 @@ fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
assert!(users.contains_key(&discussion.user_id));
|
assert!(users.contains_key(&discussion.user_id));
|
||||||
|
|
||||||
let mut posts = Vec::new();
|
let mut posts = Vec::with_capacity(1000); // @TODO count entries expected from the DB
|
||||||
for post in db.posts(discussion.id)? {
|
for post in db.posts(discussion.id)? {
|
||||||
posts.push(Post {
|
posts.push(Post {
|
||||||
id: post.id,
|
id: post.id,
|
||||||
|
|
@ -121,11 +113,13 @@ fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(index) = config.index {
|
if let Some(index) = config.index {
|
||||||
let mut file = File::create_new({
|
let path = {
|
||||||
let mut path = PathBuf::from(&config.target);
|
let mut path = PathBuf::from(&config.target);
|
||||||
path.push(format!("{}.md", index.trim_end_matches(".md")));
|
path.push(format!("{}.md", index.trim_end_matches(".md")));
|
||||||
path
|
path
|
||||||
})?;
|
};
|
||||||
|
let mut file = File::create(&path)?;
|
||||||
|
keep.insert(path);
|
||||||
for discussion in &discussions {
|
for discussion in &discussions {
|
||||||
file.write_all(
|
file.write_all(
|
||||||
format!(
|
format!(
|
||||||
|
|
@ -151,11 +145,13 @@ fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for discussion in &discussions {
|
for discussion in &discussions {
|
||||||
let mut file = File::create_new({
|
let path = {
|
||||||
let mut path = PathBuf::from(&config.target);
|
let mut path = PathBuf::from(&config.target);
|
||||||
path.push(format!("{}.md", discussion.id));
|
path.push(format!("{}.md", discussion.id));
|
||||||
path
|
path
|
||||||
})?;
|
};
|
||||||
|
let mut file = File::create(&path)?;
|
||||||
|
keep.insert(path);
|
||||||
file.write_all(
|
file.write_all(
|
||||||
{
|
{
|
||||||
let mut page = Vec::new();
|
let mut page = Vec::new();
|
||||||
|
|
@ -195,23 +191,31 @@ fn main() -> Result<()> {
|
||||||
post
|
post
|
||||||
});
|
});
|
||||||
for upload in &uploads {
|
for upload in &uploads {
|
||||||
let path_target = {
|
let t = {
|
||||||
let mut p = PathBuf::from(&config.target);
|
let mut p = PathBuf::from(&config.target);
|
||||||
p.push(upload);
|
p.push(upload);
|
||||||
p
|
p
|
||||||
};
|
};
|
||||||
match config.upload {
|
let mut p = PathBuf::from(&config.public);
|
||||||
// upload option is active,
|
|
||||||
// create files copy in the destinations
|
|
||||||
Some(ref upload_source) => {
|
|
||||||
let mut p = PathBuf::from(upload_source);
|
|
||||||
p.push(upload);
|
p.push(upload);
|
||||||
match p.canonicalize() {
|
match p.canonicalize() {
|
||||||
Ok(src) => {
|
Ok(src) => {
|
||||||
if src.starts_with(upload_source) {
|
if src.starts_with(&config.public) {
|
||||||
if !path_target.exists() {
|
if t.exists() {
|
||||||
create_dir_all(path_target.parent().unwrap())?;
|
debug!(
|
||||||
copy(src, path_target)?;
|
"Copied file `{}` for `{}` exists, skip overwrite",
|
||||||
|
t.to_string_lossy(),
|
||||||
|
src.to_string_lossy(),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
create_dir_all(t.parent().unwrap())?;
|
||||||
|
copy(&src, &t)?;
|
||||||
|
debug!(
|
||||||
|
"Copied file from `{}` to `{}`",
|
||||||
|
src.to_string_lossy(),
|
||||||
|
t.to_string_lossy(),
|
||||||
|
);
|
||||||
|
keep.insert(t);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
warn!(
|
warn!(
|
||||||
|
|
@ -224,19 +228,6 @@ fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id)
|
Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id)
|
||||||
}
|
}
|
||||||
},
|
|
||||||
// task delegated to rsync
|
|
||||||
// * manually pre-copied FoF/upload destinations must exist
|
|
||||||
None => {
|
|
||||||
if !path_target.exists() {
|
|
||||||
warn!(
|
|
||||||
"Referenced file does not exist: `{}` (post #{})",
|
|
||||||
path_target.to_string_lossy(),
|
|
||||||
post.id
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
content.push("\n---\n".into())
|
content.push("\n---\n".into())
|
||||||
}
|
}
|
||||||
|
|
@ -255,7 +246,19 @@ fn main() -> Result<()> {
|
||||||
.as_bytes(),
|
.as_bytes(),
|
||||||
)?
|
)?
|
||||||
}
|
}
|
||||||
|
cleanup(&config.target, &keep)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively removes entries that not exists in the `keep` registry
|
||||||
|
/// * empty directories cleanup yet not implemented @TODO
|
||||||
|
fn cleanup(target: &PathBuf, keep: &HashSet<PathBuf>) -> Result<()> {
|
||||||
|
for entry in read_dir(target)? {
|
||||||
|
let p = entry?.path();
|
||||||
|
if p.is_file() && !keep.contains(&p) {
|
||||||
|
remove_file(&p)?;
|
||||||
|
debug!("Cleanup file `{}`", p.to_string_lossy());
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue