From 96baf214b74a86984143bd87cd42492144331c68 Mon Sep 17 00:00:00 2001 From: yggverse Date: Wed, 12 Feb 2025 04:43:59 +0200 Subject: [PATCH] change index update logic --- src/main.rs | 175 ++++++++++++++++++---------------------------------- src/path.rs | 4 +- 2 files changed, 64 insertions(+), 115 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7564c20..7c68571 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ mod argument; mod output; mod path; -use chrono::{DateTime, FixedOffset}; use output::Output; use std::error::Error; @@ -16,25 +15,18 @@ fn main() -> Result<(), Box> { output.debug("crawler started"); - let mut status = None; - loop { - crawl(&argument.source, &argument.target, &output, &mut status)?; + crawl(&argument.source, &argument.target, &output)?; sleep(Duration::from_secs(argument.update)); } } -fn crawl( - source: &str, - target: &str, - output: &Output, - status: &mut Option>, -) -> Result<(), Box> { +fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box> { use path::Path; use reqwest::blocking::get; use rss::Channel; use std::{ - fs::{metadata, File}, + fs::{metadata, File, OpenOptions}, io::Write, }; use url::Url; @@ -44,115 +36,70 @@ fn crawl( let mut total = 0; let mut exist = 0; - let channel = Channel::read_from(&get(source)?.bytes()?[..])?; + // handle feed items + for item in Channel::read_from(&get(source)?.bytes()?[..])? + .items() + .iter() + { + total += 1; - match channel.pub_date() { - Some(pub_date) => { - // update `index.gmi` on channel `pub_date` change - { - let remote_time = chrono::DateTime::parse_from_rfc2822(pub_date)?; + let mut data = Vec::new(); - if status.is_none() || status.is_some_and(|local_time| local_time != remote_time) { - // update global state to skip `index.gmi` overwrites without changes - *status = Some(remote_time); - - let index_path = Path::build(target, pub_date, true)?; - - // build `index.gmi` members - let (mut file, mut data) = (File::create(index_path.index())?, Vec::new()); - - // collect `index.gmi` data - for item in channel.items().iter() { - match item.pub_date() { - Some(pub_date) => { - let item_path = Path::build(target, pub_date, true)?; - - // skip not relevant records from `index.gmi` - if item_path.path != index_path.path { - continue; - } - - data.push(format!("=> {} {pub_date}", item_path.item)); - - if let Some(description) = item.description() { - data.push(description.to_string()); - } - - if let Some(content) = item.content() { - data.push(content.to_string()); - } - } - None => { - output.warning("item skipped as `pub_date` required by application") - } - } - } - // update `index.gmi` file with new version - file.write_all(data.join("\n\n").as_bytes())?; - output.debug("index file updated"); + let path = match item.pub_date() { + Some(pub_date) => { + let path = Path::build(target, pub_date, true)?; + if metadata(path.item()).is_ok() { + exist += 1; + continue; } + + data.push(format!("# {pub_date}")); + path } - - // handle feed items - for item in channel.items().iter() { - total += 1; - - // handle item data - let mut data = Vec::new(); - - let path = match item.pub_date() { - Some(pub_date) => { - let path = Path::build(target, pub_date, true)?; - if metadata(path.item()).is_ok() { - exist += 1; - continue; // skip existing records - } - data.push(format!("# {pub_date}")); - path - } - None => { - output.warning("item skipped as `pub_date` required by application"); - continue; - } - }; - - if let Some(description) = item.description() { - data.push(description.to_string()); - } - - if let Some(content) = item.content() { - data.push(content.to_string()); - } - - /* @TODO local storage - if let Some(enclosure) = item.enclosure() { - match enclosure.mime_type.as_str() { - "image/jpeg" => todo!(), - _ => todo!(), - } - } */ - - if let Some(link) = item.link() { - data.push(match Url::parse(link) { - Ok(url) => { - if let Some(host) = url.host_str() { - format!("=> {link} {host}") - } else { - format!("=> {link}") - } - } - Err(e) => { - output.warning(&e.to_string()); - format!("=> {link}") - } - }) - } - - // record new item file - File::create(path.item())?.write_all(data.join("\n\n").as_bytes())?; + None => { + output.warning("item skipped as `pub_date` required by application"); + continue; } + }; + + let mut index = match OpenOptions::new().append(true).open(path.index()) { + Ok(index) => index, + Err(_) => { + let mut index = File::create_new(path.index())?; + index.write_all(format!("# {}\n", path.time.to_rfc2822()).as_bytes())?; + index + } + }; + + index.write_all(format!("\n=> {} {}\n", path.item, path.time).as_bytes())?; + + if let Some(description) = item.description() { + index.write_all(format!("\n{description}\n").as_bytes())?; + data.push(description.to_string()); } - None => output.warning("channel skipped as `pub_date` required by application"), + + if let Some(content) = item.content() { + index.write_all(format!("\n{content}\n").as_bytes())?; + data.push(content.to_string()); + } + + if let Some(link) = item.link() { + data.push(match Url::parse(link) { + Ok(url) => { + if let Some(host) = url.host_str() { + format!("=> {link} {host}") + } else { + format!("=> {link}") + } + } + Err(e) => { + output.warning(&e.to_string()); + format!("=> {link}") + } + }) + } + + File::create(path.item())?.write_all(data.join("\n\n").as_bytes())?; } output.debug(&format!( diff --git a/src/path.rs b/src/path.rs index e69c892..d41937e 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,9 +1,11 @@ +use chrono::{DateTime, FixedOffset}; use std::error::Error; use std::path::MAIN_SEPARATOR; pub struct Path { pub item: String, pub path: String, + pub time: DateTime, } impl Path { @@ -32,7 +34,7 @@ impl Path { std::fs::create_dir_all(&path)?; } - Ok(Path { item, path }) + Ok(Path { item, path, time }) } // Getters