diff --git a/Cargo.toml b/Cargo.toml index a7ffc54..406d57b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ chrono = "0.4.39" clap = { version = "4.5.28", features = ["derive"] } reqwest = { version = "0.12.12", features = ["blocking"] } rss = "2.0.11" +url = "2.5.4" diff --git a/src/main.rs b/src/main.rs index b81894e..f882470 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod argument; mod output; mod path; +use chrono::{DateTime, FixedOffset}; use output::Output; use std::error::Error; @@ -15,13 +16,20 @@ fn main() -> Result<(), Box> { output.debug("crawler started"); + let mut status = None; + loop { - crawl(&argument.source, &argument.target, &output)?; + crawl(&argument.source, &argument.target, &output, &mut status)?; sleep(Duration::from_secs(argument.update)); } } -fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box> { +fn crawl( + source: &str, + target: &str, + output: &Output, + status: &mut Option>, +) -> Result<(), Box> { use path::Path; use reqwest::blocking::get; use rss::Channel; @@ -29,58 +37,106 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box { - let path = Path::build(target, pub_data, true)?; - if metadata(&path.absolute).is_ok() { - exist += 1; - continue; // skip existing records + match channel.pub_date() { + Some(pub_date) => { + // detect index file update required + let mut index_request = { + let remote = chrono::DateTime::parse_from_rfc2822(pub_date)?; + if status.is_some_and(|local| local != remote) || status.is_none() { + *status = Some(remote); + Some(( + File::create(Path::build(target, pub_date, true)?.index())?, + Vec::new(), + )) + } else { + None } - data.push(format!("# {pub_data}")); - path + }; + + // handle feed items + for item in channel.items().iter() { + total += 1; + + // handle item data + let mut data = Vec::new(); + + let path = match item.pub_date() { + Some(pub_date) => { + let path = Path::build(target, pub_date, true)?; + if metadata(path.filepath()).is_ok() { + exist += 1; + continue; // skip existing records + } + if let Some((_, ref mut index)) = index_request { + index.push(format!("=> {} {pub_date}", path.filename())); + } + data.push(format!("# {pub_date}")); + path + } + None => { + output.warning("item skipped as `pub_date` required by application"); + continue; + } + }; + + if let Some(description) = item.description() { + if let Some((_, ref mut index)) = index_request { + index.push(description.to_string()); + } + data.push(description.to_string()); + } + + if let Some(content) = item.content() { + if let Some((_, ref mut index)) = index_request { + index.push(content.to_string()); + } + data.push(content.to_string()); + } + + /* @TODO local storage + if let Some(enclosure) = item.enclosure() { + match enclosure.mime_type.as_str() { + "image/jpeg" => todo!(), + _ => todo!(), + } + } */ + + if let Some(link) = item.link() { + data.push(match Url::parse(link) { + Ok(url) => { + if let Some(host) = url.host_str() { + format!("=> {link} {host}") + } else { + format!("=> {link}") + } + } + Err(e) => { + output.warning(&e.to_string()); + format!("=> {link}") + } + }) + } + + // record new item file + File::create(path.filepath())?.write_all(data.join("\n\n").as_bytes())?; } - None => { - output.warning("item skipped as `pub_date` is required by application"); - continue; + + // update index file + if let Some((mut file, index)) = index_request { + file.write_all(index.join("\n\n").as_bytes())?; + output.debug("index file updated"); } - }; - - if let Some(description) = item.description() { - data.push(description.to_string()); } - - if let Some(content) = item.content() { - data.push(content.to_string()); - } - - /* @TODO local storage - if let Some(enclosure) = item.enclosure() { - match enclosure.mime_type.as_str() { - "image/jpeg" => todo!(), - _ => todo!(), - } - } */ - - if let Some(link) = item.link() { - data.push(format!("=> {link}")); - } - - // record item to static file - File::create(&path.absolute)?.write_all(data.join("\n\n").as_bytes())?; + None => output.warning("channel skipped as `pub_date` required by application"), } output.debug(&format!( diff --git a/src/path.rs b/src/path.rs index d987f15..a884e39 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,25 +1,19 @@ use std::error::Error; +use std::path::MAIN_SEPARATOR; pub struct Path { - pub absolute: String, - pub directory: String, - pub file: String, + file: String, + path: String, } impl Path { + // Constructors + pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result> { use chrono::{DateTime, Datelike, Timelike}; - use std::{fs::create_dir_all, path::MAIN_SEPARATOR}; let date_time = DateTime::parse_from_rfc2822(pub_date)?; - let directory = format!( - "{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}", - date_time.year(), - date_time.month(), - date_time.day() - ); - let file = format!( "{:02}-{:02}-{:02}.gmi", date_time.hour(), @@ -27,14 +21,31 @@ impl Path { date_time.second() ); + let path = format!( + "{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}", + date_time.year(), + date_time.month(), + date_time.day() + ); + if mkdir { - create_dir_all(&directory)?; + std::fs::create_dir_all(&path)?; } - Ok(Path { - absolute: format!("{directory}{MAIN_SEPARATOR}{file}"), - directory, - file, - }) + Ok(Path { file, path }) + } + + // Getters + + pub fn index(&self) -> String { + format!("{}{MAIN_SEPARATOR}index.gmi", self.path) + } + + pub fn filepath(&self) -> String { + format!("{}{MAIN_SEPARATOR}{}", self.path, self.file) + } + + pub fn filename(&self) -> &str { + &self.file } }