implement index file builder

This commit is contained in:
yggverse 2025-02-11 23:34:57 +02:00
parent 120876fef3
commit 4d4c78a4fc
3 changed files with 128 additions and 60 deletions

View file

@ -2,6 +2,7 @@ mod argument;
mod output;
mod path;
use chrono::{DateTime, FixedOffset};
use output::Output;
use std::error::Error;
@ -15,13 +16,20 @@ fn main() -> Result<(), Box<dyn Error>> {
output.debug("crawler started");
let mut status = None;
loop {
crawl(&argument.source, &argument.target, &output)?;
crawl(&argument.source, &argument.target, &output, &mut status)?;
sleep(Duration::from_secs(argument.update));
}
}
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
fn crawl(
source: &str,
target: &str,
output: &Output,
status: &mut Option<DateTime<FixedOffset>>,
) -> Result<(), Box<dyn Error>> {
use path::Path;
use reqwest::blocking::get;
use rss::Channel;
@ -29,58 +37,106 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
fs::{metadata, File},
io::Write,
};
use url::Url;
output.debug("update begin");
let mut total = 0;
let mut exist = 0;
for item in Channel::read_from(&get(source)?.bytes()?[..])?
.items()
.iter()
{
total += 1;
// handle item data
let mut data = Vec::new();
let channel = Channel::read_from(&get(source)?.bytes()?[..])?;
let path = match item.pub_date() {
Some(pub_data) => {
let path = Path::build(target, pub_data, true)?;
if metadata(&path.absolute).is_ok() {
exist += 1;
continue; // skip existing records
match channel.pub_date() {
Some(pub_date) => {
// detect index file update required
let mut index_request = {
let remote = chrono::DateTime::parse_from_rfc2822(pub_date)?;
if status.is_some_and(|local| local != remote) || status.is_none() {
*status = Some(remote);
Some((
File::create(Path::build(target, pub_date, true)?.index())?,
Vec::new(),
))
} else {
None
}
data.push(format!("# {pub_data}"));
path
};
// handle feed items
for item in channel.items().iter() {
total += 1;
// handle item data
let mut data = Vec::new();
let path = match item.pub_date() {
Some(pub_date) => {
let path = Path::build(target, pub_date, true)?;
if metadata(path.filepath()).is_ok() {
exist += 1;
continue; // skip existing records
}
if let Some((_, ref mut index)) = index_request {
index.push(format!("=> {} {pub_date}", path.filename()));
}
data.push(format!("# {pub_date}"));
path
}
None => {
output.warning("item skipped as `pub_date` required by application");
continue;
}
};
if let Some(description) = item.description() {
if let Some((_, ref mut index)) = index_request {
index.push(description.to_string());
}
data.push(description.to_string());
}
if let Some(content) = item.content() {
if let Some((_, ref mut index)) = index_request {
index.push(content.to_string());
}
data.push(content.to_string());
}
/* @TODO local storage
if let Some(enclosure) = item.enclosure() {
match enclosure.mime_type.as_str() {
"image/jpeg" => todo!(),
_ => todo!(),
}
} */
if let Some(link) = item.link() {
data.push(match Url::parse(link) {
Ok(url) => {
if let Some(host) = url.host_str() {
format!("=> {link} {host}")
} else {
format!("=> {link}")
}
}
Err(e) => {
output.warning(&e.to_string());
format!("=> {link}")
}
})
}
// record new item file
File::create(path.filepath())?.write_all(data.join("\n\n").as_bytes())?;
}
None => {
output.warning("item skipped as `pub_date` is required by application");
continue;
// update index file
if let Some((mut file, index)) = index_request {
file.write_all(index.join("\n\n").as_bytes())?;
output.debug("index file updated");
}
};
if let Some(description) = item.description() {
data.push(description.to_string());
}
if let Some(content) = item.content() {
data.push(content.to_string());
}
/* @TODO local storage
if let Some(enclosure) = item.enclosure() {
match enclosure.mime_type.as_str() {
"image/jpeg" => todo!(),
_ => todo!(),
}
} */
if let Some(link) = item.link() {
data.push(format!("=> {link}"));
}
// record item to static file
File::create(&path.absolute)?.write_all(data.join("\n\n").as_bytes())?;
None => output.warning("channel skipped as `pub_date` required by application"),
}
output.debug(&format!(

View file

@ -1,25 +1,19 @@
use std::error::Error;
use std::path::MAIN_SEPARATOR;
pub struct Path {
pub absolute: String,
pub directory: String,
pub file: String,
file: String,
path: String,
}
impl Path {
// Constructors
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> {
use chrono::{DateTime, Datelike, Timelike};
use std::{fs::create_dir_all, path::MAIN_SEPARATOR};
let date_time = DateTime::parse_from_rfc2822(pub_date)?;
let directory = format!(
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
date_time.year(),
date_time.month(),
date_time.day()
);
let file = format!(
"{:02}-{:02}-{:02}.gmi",
date_time.hour(),
@ -27,14 +21,31 @@ impl Path {
date_time.second()
);
let path = format!(
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
date_time.year(),
date_time.month(),
date_time.day()
);
if mkdir {
create_dir_all(&directory)?;
std::fs::create_dir_all(&path)?;
}
Ok(Path {
absolute: format!("{directory}{MAIN_SEPARATOR}{file}"),
directory,
file,
})
Ok(Path { file, path })
}
// Getters
pub fn index(&self) -> String {
format!("{}{MAIN_SEPARATOR}index.gmi", self.path)
}
pub fn filepath(&self) -> String {
format!("{}{MAIN_SEPARATOR}{}", self.path, self.file)
}
pub fn filename(&self) -> &str {
&self.file
}
}