diff --git a/Cargo.lock b/Cargo.lock index 08292d8..0224c9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -273,8 +273,10 @@ dependencies = [ "chrono", "clap", "html-to-markdown-rs", + "log", "regex", "rusqlite", + "tracing-subscriber", ] [[package]] @@ -404,6 +406,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.183" @@ -455,6 +463,15 @@ dependencies = [ "web_atoms", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.8.0" @@ -467,6 +484,15 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -550,6 +576,12 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "pkg-config" version = "0.3.32" @@ -699,6 +731,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -800,6 +841,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -809,6 +859,55 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -833,6 +932,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 1b0a0eb..9051e03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,5 +14,7 @@ anyhow = "1.0.102" chrono = "0.4.44" clap = { version = "4.6.0", features = ["derive"] } html-to-markdown-rs = "2.28.2" +log = "0.4.29" regex = "1.12.3" rusqlite = { version = "0.39.0", features = ["chrono"]} +tracing-subscriber = { version = "0.3.23", features = ["env-filter"] } diff --git a/src/config.rs b/src/config.rs index 675c97b..371a901 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,6 +8,11 @@ pub struct Config { #[arg(short, long)] pub source: PathBuf, + /// Path to export FoF/upload tags from + /// * tip: root should be the path to `flarum/public` dir + #[arg(short, long)] + pub upload: PathBuf, + /// Path to export markdown #[arg(short, long)] pub target: PathBuf, diff --git a/src/main.rs b/src/main.rs index 311f5b4..aca7390 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,15 +2,17 @@ mod config; mod database; use anyhow::Result; -use chrono::{DateTime, Utc}; +use chrono::{DateTime, Local, Utc}; use clap::Parser; use config::Config; use database::Database; use html_to_markdown_rs::convert; -use regex::Regex; +use log::*; +use regex::{Captures, Regex}; use std::{ - collections::HashMap, - fs::{File, create_dir_all, remove_dir_all}, + collections::{HashMap, HashSet}, + env::var, + fs::{File, copy, create_dir_all, remove_dir_all}, io::Write, path::PathBuf, }; @@ -35,6 +37,20 @@ pub struct Discussion { } fn main() -> Result<()> { + if var("RUST_LOG").is_ok() { + use tracing_subscriber::{EnvFilter, fmt::*}; + struct T; + impl time::FormatTime for T { + fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result { + write!(w, "{}", Local::now()) + } + } + fmt() + .with_timer(T) + .with_env_filter(EnvFilter::from_default_env()) + .init() + } + let config = Config::parse(); if config.target.exists() { @@ -128,10 +144,36 @@ fn main() -> Result<()> { .map(|edited_at| format!(" / {}", edited_at)) .unwrap_or_default() )); + let mut uploads = HashSet::new(); content.push(post_format(&convert( - pre_format(&post.content).trim(), + pre_format(&post.content, &mut uploads).trim(), None, )?)); + for upload in &uploads { + let path_source = { + let mut p = PathBuf::from(&config.upload); + p.push(upload); + p + }; + let path_target = { + let mut p = PathBuf::from(&config.target); + p.push(upload); + p + }; + let path_parent = path_target.parent().unwrap(); + + create_dir_all(path_parent)?; + if !path_target.exists() { + if path_source.exists() { + copy(path_source, path_target)?; + } else { + warn!( + "Source file does not exists: `{}`", + path_source.to_string_lossy() + ) + } + } + } content.push("---\n".into()) } content.join("\n") @@ -145,11 +187,28 @@ fn main() -> Result<()> { Ok(()) } -fn pre_format(data: &str) -> String { - let s = Regex::new(r"[^<]+").unwrap(); - let e = Regex::new(r"[^<]+").unwrap(); - - e.replace_all(&s.replace_all(data, ""), "") +fn pre_format(data: &str, uploads: &mut HashSet) -> String { + Regex::new(r"[^<]+") + .unwrap() + .replace_all( + &Regex::new(r"[^<]+").unwrap().replace_all( + &Regex::new(r"(?s)]+)>\[[^\]]+\]") + .unwrap() + .replace_all(data, |c: &Captures| { + uploads.insert( + Regex::new(r#"url="([^"]+)""#) + .unwrap() + .captures(&c[1]) + .unwrap()[1] + .trim_start_matches("/") + .into(), + ); + format!("", c[1].replace(" url=", " src=")) + }), + "", + ), + "", + ) .replace("", "") .replace("