update index.gmi renew method (now based on all items collected)

This commit is contained in:
yggverse 2025-02-12 18:12:29 +02:00
parent 571f5e529b
commit 5a69f81807
2 changed files with 50 additions and 37 deletions

View file

@ -1,17 +1,15 @@
use chrono::{DateTime, FixedOffset};
use std::error::Error; use std::error::Error;
use std::path::MAIN_SEPARATOR; use std::path::MAIN_SEPARATOR;
pub struct Path { pub struct Destination {
pub item: String, pub item: String,
pub path: String, pub path: String,
pub time: DateTime<FixedOffset>,
} }
impl Path { impl Destination {
// Constructors // Constructors
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> { pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Self, Box<dyn Error>> {
use chrono::{DateTime, Datelike, Timelike}; use chrono::{DateTime, Datelike, Timelike};
let time = DateTime::parse_from_rfc2822(pub_date)?; let time = DateTime::parse_from_rfc2822(pub_date)?;
@ -24,7 +22,7 @@ impl Path {
); );
let path = format!( let path = format!(
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}", "{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}",
time.year(), time.year(),
time.month(), time.month(),
time.day() time.day()
@ -34,16 +32,12 @@ impl Path {
std::fs::create_dir_all(&path)?; std::fs::create_dir_all(&path)?;
} }
Ok(Path { item, path, time }) Ok(Self { item, path })
} }
// Getters // Getters
pub fn index(&self) -> String {
format!("{}{MAIN_SEPARATOR}index.gmi", self.path)
}
pub fn item(&self) -> String { pub fn item(&self) -> String {
format!("{}{MAIN_SEPARATOR}{}", self.path, self.item) format!("{}{}", self.path, self.item)
} }
} }

View file

@ -1,6 +1,6 @@
mod argument; mod argument;
mod destination;
mod output; mod output;
mod path;
use output::Output; use output::Output;
use std::error::Error; use std::error::Error;
@ -22,21 +22,23 @@ fn main() -> Result<(), Box<dyn Error>> {
} }
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> { fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
use path::Path; use destination::Destination;
use reqwest::blocking::get; use reqwest::blocking::get;
use rss::Channel; use rss::Channel;
use std::{ use std::{
fs::{metadata, File, OpenOptions}, collections::HashSet,
io::Write, fs::{metadata, read_dir, File},
io::{Read, Write},
}; };
use url::Url; use url::Url;
output.debug("update begin"); output.debug("feed update begin");
let mut total = 0; let mut total = 0;
let mut exist = 0; let mut exist = 0;
let mut index = HashSet::new();
// handle feed items // collect feed items
for item in Channel::read_from(&get(source)?.bytes()?[..])? for item in Channel::read_from(&get(source)?.bytes()?[..])?
.items() .items()
.iter() .iter()
@ -45,16 +47,16 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
let mut data = Vec::new(); let mut data = Vec::new();
let path = match item.pub_date() { let destination = match item.pub_date() {
Some(pub_date) => { Some(pub_date) => {
let path = Path::build(target, pub_date, true)?; let destination = Destination::build(target, pub_date, true)?;
if metadata(path.item()).is_ok() { if metadata(destination.item()).is_ok() {
exist += 1; exist += 1;
continue; continue;
} }
data.push(format!("# {pub_date}")); data.push(format!("# {pub_date}"));
path destination
} }
None => { None => {
output.warning("item skipped as `pub_date` required by application"); output.warning("item skipped as `pub_date` required by application");
@ -62,24 +64,11 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
} }
}; };
let mut index = match OpenOptions::new().append(true).open(path.index()) {
Ok(index) => index,
Err(_) => {
let mut index = File::create_new(path.index())?;
index.write_all(format!("# {}\n", path.time.to_rfc2822()).as_bytes())?;
index
}
};
index.write_all(format!("\n=> {} {}\n", path.item, path.time).as_bytes())?;
if let Some(description) = item.description() { if let Some(description) = item.description() {
index.write_all(format!("\n{description}\n").as_bytes())?;
data.push(description.to_string()); data.push(description.to_string());
} }
if let Some(content) = item.content() { if let Some(content) = item.content() {
index.write_all(format!("\n{content}\n").as_bytes())?;
data.push(content.to_string()); data.push(content.to_string());
} }
@ -99,11 +88,41 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
}) })
} }
File::create(path.item())?.write_all(data.join("\n\n").as_bytes())?; File::create(destination.item())?.write_all(data.join("\n\n").as_bytes())?;
index.insert(destination.path); // request `index.gmi` update
} }
// renew pending `index.gmi` files on items crawl completed
for path in index {
let subject = format!("{path}index.gmi");
let mut index = File::create(&subject)?;
let mut data = Vec::with_capacity(10); // @TODO
let mut total = 0;
for file in read_dir(&path)? {
let name = file?.file_name().into_string().unwrap();
if name == "index.gmi" {
continue;
}
let mut buffer = String::new();
File::open(&format!("{path}{name}"))?.read_to_string(&mut buffer)?;
data.push(buffer);
total += 1;
}
index.write_all(data.join("\n\n").as_bytes())?;
output.debug(&format!("renew `{subject}` (total: {total})"));
}
// print totals
output.debug(&format!( output.debug(&format!(
"update completed (added: {} / exist: {exist} / total: {total})", "feed update completed (added: {} / exist: {exist} / total: {total})",
total - exist total - exist
)); ));