diff --git a/Cargo.lock b/Cargo.lock index bc574b7..6861785 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,9 +96,9 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "cc" -version = "1.2.57" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", "shlex", @@ -195,7 +195,7 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flarumdown" -version = "0.3.1" +version = "0.4.0" dependencies = [ "anyhow", "chrono", @@ -279,9 +279,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" dependencies = [ "once_cell", "wasm-bindgen", @@ -617,9 +617,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" dependencies = [ "cfg-if", "once_cell", @@ -630,9 +630,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -640,9 +640,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" dependencies = [ "bumpalo", "proc-macro2", @@ -653,9 +653,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" dependencies = [ "unicode-ident", ] diff --git a/Cargo.toml b/Cargo.toml index 90f3b83..0bc973c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flarumdown" -version = "0.3.1" +version = "0.4.0" edition = "2024" license = "MIT" readme = "README.md" diff --git a/README.md b/README.md index 84ad57b..f8c155e 100644 --- a/README.md +++ b/README.md @@ -26,45 +26,10 @@ cargo install flarumdown ``` bash RUST_LOG=warn flarumdown -s '/path/to/flarum.sqlite' \ -t '/path/to/target' \ + -p '/path/to/public' \ -i index \ -r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \ -r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \ -r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \ -r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion -``` - -### rsync - -Optionally, delegate `-u` to `rsync` by using `crontab -e`: - -``` bash -#!/bin/bash - -mkdir -p /var/www/flarum/public/flarumdown/dump/assets/files - -# collect FoF/upload files -readonly RSYNC_FILTER_P="*-thumb.webp" -readonly RSYNC_TARGET_D="/var/www/flarum/public/flarumdown/dump/assets" -find "$RSYNC_TARGET_D" -name "$RSYNC_FILTER_P" -type f -delete # rsync has --filter, cleanup -/usr/bin/rsync -av --delete --filter="-p $RSYNC_FILTER_P" \ - /var/www/flarum/public/assets/files \ - $RSYNC_TARGET_D - -# dump the DB -RUST_LOG=warn /usr/local/bin/flarumdown -s /var/www/flarum/flarum.sqlite \ - -t /var/www/flarum/public/flarumdown/dump \ - -i index \ - -r http://[202:68d0:f0d5:b88d:1d1a:555e:2f6b:3148] \ - -r http://[505:6847:c778:61a1:5c6d:e802:d291:8191] \ - -r http://hc3fycfadz7fkapp62fqi6llioe46fvis6wuswfobl5ghc2u7snq.b32.i2p \ - -r http://w6vtcpbir5vvokwdqqbqlrdtnzwyfc4iyqn6owxuyjeppszuydutqwqd.onion - -# create .zip file to simply download for offline reading -readonly TARGET_DIR=/var/www/flarum/public/flarumdown/dump -cd "$TARGET_DIR" -if [ "$(pwd)" != "$TARGET_DIR" ]; then - echo "Unexpected path!" - exit 1 -fi -zip -r -9 /var/www/flarum/public/flarumdown/dump.zip . ``` \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index d7b6d9d..3390228 100644 --- a/src/config.rs +++ b/src/config.rs @@ -9,13 +9,12 @@ pub struct Config { pub source: PathBuf, /// Path to export FoF/upload files from - /// tips: - /// * root should be the path to `flarum/public` dir - /// * use rsync instead of this option for longer SSD life + /// * the root is path to the public dir (e.g. `/var/www/flarum/public`) #[arg(short, long)] - pub upload: Option, + pub public: PathBuf, /// Path to export markdown + /// * e.g. `/var/www/flarum/public/flarumdown/dump` #[arg(short, long)] pub target: PathBuf, diff --git a/src/main.rs b/src/main.rs index 47e4f1f..232ef79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ use regex::{Captures, Regex}; use std::{ collections::{HashMap, HashSet}, env::var, - fs::{File, copy, create_dir_all, read_dir, remove_dir_all, remove_file}, + fs::{File, copy, create_dir_all, read_dir, remove_file}, io::Write, path::PathBuf, }; @@ -56,18 +56,10 @@ fn main() -> Result<()> { if !config.target.exists() { create_dir_all(&config.target)?; } - for entry in read_dir(&config.target)? { - let path = entry?.path(); - if path.is_file() { - remove_file(path)?; - } else if path.is_dir() && config.upload.is_some() { - remove_dir_all(path)?; - } - } let mut db = Database::connect(config.source)?; - - let mut users = HashMap::new(); + let mut keep = HashSet::with_capacity(1000); // @TODO count entries expected from the DB + let mut users = HashMap::with_capacity(100); // @TODO count entries expected from the DB for user in db.users()? { assert!( users @@ -81,7 +73,7 @@ fn main() -> Result<()> { ) } - let mut tags = HashMap::new(); + let mut tags = HashMap::with_capacity(100); // @TODO count entries expected from the DB for tag in db.tags()? { if !config.filter_tag.is_empty() && !config.filter_tag.contains(&tag.slug) { continue; @@ -89,7 +81,7 @@ fn main() -> Result<()> { assert!(tags.insert(tag.id, tag.slug).is_none()) } - let mut discussions = Vec::new(); + let mut discussions = Vec::with_capacity(1000); // @TODO count entries expected from the DB for discussion in db.discussions()? { if !db .discussion_tag_ids(discussion.id)? @@ -100,7 +92,7 @@ fn main() -> Result<()> { } assert!(users.contains_key(&discussion.user_id)); - let mut posts = Vec::new(); + let mut posts = Vec::with_capacity(1000); // @TODO count entries expected from the DB for post in db.posts(discussion.id)? { posts.push(Post { id: post.id, @@ -121,11 +113,13 @@ fn main() -> Result<()> { } if let Some(index) = config.index { - let mut file = File::create_new({ + let path = { let mut path = PathBuf::from(&config.target); path.push(format!("{}.md", index.trim_end_matches(".md"))); path - })?; + }; + let mut file = File::create(&path)?; + keep.insert(path); for discussion in &discussions { file.write_all( format!( @@ -151,11 +145,13 @@ fn main() -> Result<()> { } for discussion in &discussions { - let mut file = File::create_new({ + let path = { let mut path = PathBuf::from(&config.target); path.push(format!("{}.md", discussion.id)); path - })?; + }; + let mut file = File::create(&path)?; + keep.insert(path); file.write_all( { let mut page = Vec::new(); @@ -195,47 +191,42 @@ fn main() -> Result<()> { post }); for upload in &uploads { - let path_target = { + let t = { let mut p = PathBuf::from(&config.target); p.push(upload); p }; - match config.upload { - // upload option is active, - // create files copy in the destinations - Some(ref upload_source) => { - let mut p = PathBuf::from(upload_source); - p.push(upload); - match p.canonicalize() { - Ok(src) => { - if src.starts_with(upload_source) { - if !path_target.exists() { - create_dir_all(path_target.parent().unwrap())?; - copy(src, path_target)?; - } - } else { - warn!( - "Possible traversal injection: `{}` (post #{}, user #{})", - src.to_string_lossy(), - post.id, - post.user_id - ) - } + let mut p = PathBuf::from(&config.public); + p.push(upload); + match p.canonicalize() { + Ok(src) => { + if src.starts_with(&config.public) { + if t.exists() { + debug!( + "Copied file `{}` for `{}` exists, skip overwrite", + t.to_string_lossy(), + src.to_string_lossy(), + ); + } else { + create_dir_all(t.parent().unwrap())?; + copy(&src, &t)?; + debug!( + "Copied file from `{}` to `{}`", + src.to_string_lossy(), + t.to_string_lossy(), + ); + keep.insert(t); } - Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id) - } - }, - // task delegated to rsync - // * manually pre-copied FoF/upload destinations must exist - None => { - if !path_target.exists() { + } else { warn!( - "Referenced file does not exist: `{}` (post #{})", - path_target.to_string_lossy(), - post.id + "Possible traversal injection: `{}` (post #{}, user #{})", + src.to_string_lossy(), + post.id, + post.user_id ) } } + Err(e) => error!("{e}: `{}` (post #{})", p.to_string_lossy(), post.id) } } content.push("\n---\n".into()) @@ -255,7 +246,19 @@ fn main() -> Result<()> { .as_bytes(), )? } + cleanup(&config.target, &keep) +} +/// Recursively removes entries that not exists in the `keep` registry +/// * empty directories cleanup yet not implemented @TODO +fn cleanup(target: &PathBuf, keep: &HashSet) -> Result<()> { + for entry in read_dir(target)? { + let p = entry?.path(); + if p.is_file() && !keep.contains(&p) { + remove_file(&p)?; + debug!("Cleanup file `{}`", p.to_string_lossy()); + } + } Ok(()) }