mirror of
https://github.com/YGGverse/pulsarss.git
synced 2026-03-31 17:15:29 +00:00
implement index file builder
This commit is contained in:
parent
120876fef3
commit
4d4c78a4fc
3 changed files with 128 additions and 60 deletions
142
src/main.rs
142
src/main.rs
|
|
@ -2,6 +2,7 @@ mod argument;
|
|||
mod output;
|
||||
mod path;
|
||||
|
||||
use chrono::{DateTime, FixedOffset};
|
||||
use output::Output;
|
||||
use std::error::Error;
|
||||
|
||||
|
|
@ -15,13 +16,20 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
|
||||
output.debug("crawler started");
|
||||
|
||||
let mut status = None;
|
||||
|
||||
loop {
|
||||
crawl(&argument.source, &argument.target, &output)?;
|
||||
crawl(&argument.source, &argument.target, &output, &mut status)?;
|
||||
sleep(Duration::from_secs(argument.update));
|
||||
}
|
||||
}
|
||||
|
||||
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
|
||||
fn crawl(
|
||||
source: &str,
|
||||
target: &str,
|
||||
output: &Output,
|
||||
status: &mut Option<DateTime<FixedOffset>>,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
use path::Path;
|
||||
use reqwest::blocking::get;
|
||||
use rss::Channel;
|
||||
|
|
@ -29,58 +37,106 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
|||
fs::{metadata, File},
|
||||
io::Write,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
output.debug("update begin");
|
||||
|
||||
let mut total = 0;
|
||||
let mut exist = 0;
|
||||
for item in Channel::read_from(&get(source)?.bytes()?[..])?
|
||||
.items()
|
||||
.iter()
|
||||
{
|
||||
total += 1;
|
||||
|
||||
// handle item data
|
||||
let mut data = Vec::new();
|
||||
let channel = Channel::read_from(&get(source)?.bytes()?[..])?;
|
||||
|
||||
let path = match item.pub_date() {
|
||||
Some(pub_data) => {
|
||||
let path = Path::build(target, pub_data, true)?;
|
||||
if metadata(&path.absolute).is_ok() {
|
||||
exist += 1;
|
||||
continue; // skip existing records
|
||||
match channel.pub_date() {
|
||||
Some(pub_date) => {
|
||||
// detect index file update required
|
||||
let mut index_request = {
|
||||
let remote = chrono::DateTime::parse_from_rfc2822(pub_date)?;
|
||||
if status.is_some_and(|local| local != remote) || status.is_none() {
|
||||
*status = Some(remote);
|
||||
Some((
|
||||
File::create(Path::build(target, pub_date, true)?.index())?,
|
||||
Vec::new(),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
data.push(format!("# {pub_data}"));
|
||||
path
|
||||
};
|
||||
|
||||
// handle feed items
|
||||
for item in channel.items().iter() {
|
||||
total += 1;
|
||||
|
||||
// handle item data
|
||||
let mut data = Vec::new();
|
||||
|
||||
let path = match item.pub_date() {
|
||||
Some(pub_date) => {
|
||||
let path = Path::build(target, pub_date, true)?;
|
||||
if metadata(path.filepath()).is_ok() {
|
||||
exist += 1;
|
||||
continue; // skip existing records
|
||||
}
|
||||
if let Some((_, ref mut index)) = index_request {
|
||||
index.push(format!("=> {} {pub_date}", path.filename()));
|
||||
}
|
||||
data.push(format!("# {pub_date}"));
|
||||
path
|
||||
}
|
||||
None => {
|
||||
output.warning("item skipped as `pub_date` required by application");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(description) = item.description() {
|
||||
if let Some((_, ref mut index)) = index_request {
|
||||
index.push(description.to_string());
|
||||
}
|
||||
data.push(description.to_string());
|
||||
}
|
||||
|
||||
if let Some(content) = item.content() {
|
||||
if let Some((_, ref mut index)) = index_request {
|
||||
index.push(content.to_string());
|
||||
}
|
||||
data.push(content.to_string());
|
||||
}
|
||||
|
||||
/* @TODO local storage
|
||||
if let Some(enclosure) = item.enclosure() {
|
||||
match enclosure.mime_type.as_str() {
|
||||
"image/jpeg" => todo!(),
|
||||
_ => todo!(),
|
||||
}
|
||||
} */
|
||||
|
||||
if let Some(link) = item.link() {
|
||||
data.push(match Url::parse(link) {
|
||||
Ok(url) => {
|
||||
if let Some(host) = url.host_str() {
|
||||
format!("=> {link} {host}")
|
||||
} else {
|
||||
format!("=> {link}")
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
output.warning(&e.to_string());
|
||||
format!("=> {link}")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// record new item file
|
||||
File::create(path.filepath())?.write_all(data.join("\n\n").as_bytes())?;
|
||||
}
|
||||
None => {
|
||||
output.warning("item skipped as `pub_date` is required by application");
|
||||
continue;
|
||||
|
||||
// update index file
|
||||
if let Some((mut file, index)) = index_request {
|
||||
file.write_all(index.join("\n\n").as_bytes())?;
|
||||
output.debug("index file updated");
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(description) = item.description() {
|
||||
data.push(description.to_string());
|
||||
}
|
||||
|
||||
if let Some(content) = item.content() {
|
||||
data.push(content.to_string());
|
||||
}
|
||||
|
||||
/* @TODO local storage
|
||||
if let Some(enclosure) = item.enclosure() {
|
||||
match enclosure.mime_type.as_str() {
|
||||
"image/jpeg" => todo!(),
|
||||
_ => todo!(),
|
||||
}
|
||||
} */
|
||||
|
||||
if let Some(link) = item.link() {
|
||||
data.push(format!("=> {link}"));
|
||||
}
|
||||
|
||||
// record item to static file
|
||||
File::create(&path.absolute)?.write_all(data.join("\n\n").as_bytes())?;
|
||||
None => output.warning("channel skipped as `pub_date` required by application"),
|
||||
}
|
||||
|
||||
output.debug(&format!(
|
||||
|
|
|
|||
45
src/path.rs
45
src/path.rs
|
|
@ -1,25 +1,19 @@
|
|||
use std::error::Error;
|
||||
use std::path::MAIN_SEPARATOR;
|
||||
|
||||
pub struct Path {
|
||||
pub absolute: String,
|
||||
pub directory: String,
|
||||
pub file: String,
|
||||
file: String,
|
||||
path: String,
|
||||
}
|
||||
|
||||
impl Path {
|
||||
// Constructors
|
||||
|
||||
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> {
|
||||
use chrono::{DateTime, Datelike, Timelike};
|
||||
use std::{fs::create_dir_all, path::MAIN_SEPARATOR};
|
||||
|
||||
let date_time = DateTime::parse_from_rfc2822(pub_date)?;
|
||||
|
||||
let directory = format!(
|
||||
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
|
||||
date_time.year(),
|
||||
date_time.month(),
|
||||
date_time.day()
|
||||
);
|
||||
|
||||
let file = format!(
|
||||
"{:02}-{:02}-{:02}.gmi",
|
||||
date_time.hour(),
|
||||
|
|
@ -27,14 +21,31 @@ impl Path {
|
|||
date_time.second()
|
||||
);
|
||||
|
||||
let path = format!(
|
||||
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
|
||||
date_time.year(),
|
||||
date_time.month(),
|
||||
date_time.day()
|
||||
);
|
||||
|
||||
if mkdir {
|
||||
create_dir_all(&directory)?;
|
||||
std::fs::create_dir_all(&path)?;
|
||||
}
|
||||
|
||||
Ok(Path {
|
||||
absolute: format!("{directory}{MAIN_SEPARATOR}{file}"),
|
||||
directory,
|
||||
file,
|
||||
})
|
||||
Ok(Path { file, path })
|
||||
}
|
||||
|
||||
// Getters
|
||||
|
||||
pub fn index(&self) -> String {
|
||||
format!("{}{MAIN_SEPARATOR}index.gmi", self.path)
|
||||
}
|
||||
|
||||
pub fn filepath(&self) -> String {
|
||||
format!("{}{MAIN_SEPARATOR}{}", self.path, self.file)
|
||||
}
|
||||
|
||||
pub fn filename(&self) -> &str {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue