mirror of
https://github.com/YGGverse/pulsarss.git
synced 2026-03-31 17:15:29 +00:00
implement index file builder
This commit is contained in:
parent
120876fef3
commit
4d4c78a4fc
3 changed files with 128 additions and 60 deletions
|
|
@ -14,3 +14,4 @@ chrono = "0.4.39"
|
||||||
clap = { version = "4.5.28", features = ["derive"] }
|
clap = { version = "4.5.28", features = ["derive"] }
|
||||||
reqwest = { version = "0.12.12", features = ["blocking"] }
|
reqwest = { version = "0.12.12", features = ["blocking"] }
|
||||||
rss = "2.0.11"
|
rss = "2.0.11"
|
||||||
|
url = "2.5.4"
|
||||||
|
|
|
||||||
84
src/main.rs
84
src/main.rs
|
|
@ -2,6 +2,7 @@ mod argument;
|
||||||
mod output;
|
mod output;
|
||||||
mod path;
|
mod path;
|
||||||
|
|
||||||
|
use chrono::{DateTime, FixedOffset};
|
||||||
use output::Output;
|
use output::Output;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
|
|
@ -15,13 +16,20 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||||
|
|
||||||
output.debug("crawler started");
|
output.debug("crawler started");
|
||||||
|
|
||||||
|
let mut status = None;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
crawl(&argument.source, &argument.target, &output)?;
|
crawl(&argument.source, &argument.target, &output, &mut status)?;
|
||||||
sleep(Duration::from_secs(argument.update));
|
sleep(Duration::from_secs(argument.update));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
|
fn crawl(
|
||||||
|
source: &str,
|
||||||
|
target: &str,
|
||||||
|
output: &Output,
|
||||||
|
status: &mut Option<DateTime<FixedOffset>>,
|
||||||
|
) -> Result<(), Box<dyn Error>> {
|
||||||
use path::Path;
|
use path::Path;
|
||||||
use reqwest::blocking::get;
|
use reqwest::blocking::get;
|
||||||
use rss::Channel;
|
use rss::Channel;
|
||||||
|
|
@ -29,41 +37,68 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
||||||
fs::{metadata, File},
|
fs::{metadata, File},
|
||||||
io::Write,
|
io::Write,
|
||||||
};
|
};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
output.debug("update begin");
|
output.debug("update begin");
|
||||||
|
|
||||||
let mut total = 0;
|
let mut total = 0;
|
||||||
let mut exist = 0;
|
let mut exist = 0;
|
||||||
for item in Channel::read_from(&get(source)?.bytes()?[..])?
|
|
||||||
.items()
|
let channel = Channel::read_from(&get(source)?.bytes()?[..])?;
|
||||||
.iter()
|
|
||||||
{
|
match channel.pub_date() {
|
||||||
|
Some(pub_date) => {
|
||||||
|
// detect index file update required
|
||||||
|
let mut index_request = {
|
||||||
|
let remote = chrono::DateTime::parse_from_rfc2822(pub_date)?;
|
||||||
|
if status.is_some_and(|local| local != remote) || status.is_none() {
|
||||||
|
*status = Some(remote);
|
||||||
|
Some((
|
||||||
|
File::create(Path::build(target, pub_date, true)?.index())?,
|
||||||
|
Vec::new(),
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// handle feed items
|
||||||
|
for item in channel.items().iter() {
|
||||||
total += 1;
|
total += 1;
|
||||||
|
|
||||||
// handle item data
|
// handle item data
|
||||||
let mut data = Vec::new();
|
let mut data = Vec::new();
|
||||||
|
|
||||||
let path = match item.pub_date() {
|
let path = match item.pub_date() {
|
||||||
Some(pub_data) => {
|
Some(pub_date) => {
|
||||||
let path = Path::build(target, pub_data, true)?;
|
let path = Path::build(target, pub_date, true)?;
|
||||||
if metadata(&path.absolute).is_ok() {
|
if metadata(path.filepath()).is_ok() {
|
||||||
exist += 1;
|
exist += 1;
|
||||||
continue; // skip existing records
|
continue; // skip existing records
|
||||||
}
|
}
|
||||||
data.push(format!("# {pub_data}"));
|
if let Some((_, ref mut index)) = index_request {
|
||||||
|
index.push(format!("=> {} {pub_date}", path.filename()));
|
||||||
|
}
|
||||||
|
data.push(format!("# {pub_date}"));
|
||||||
path
|
path
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
output.warning("item skipped as `pub_date` is required by application");
|
output.warning("item skipped as `pub_date` required by application");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(description) = item.description() {
|
if let Some(description) = item.description() {
|
||||||
|
if let Some((_, ref mut index)) = index_request {
|
||||||
|
index.push(description.to_string());
|
||||||
|
}
|
||||||
data.push(description.to_string());
|
data.push(description.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(content) = item.content() {
|
if let Some(content) = item.content() {
|
||||||
|
if let Some((_, ref mut index)) = index_request {
|
||||||
|
index.push(content.to_string());
|
||||||
|
}
|
||||||
data.push(content.to_string());
|
data.push(content.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,11 +111,32 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
||||||
} */
|
} */
|
||||||
|
|
||||||
if let Some(link) = item.link() {
|
if let Some(link) = item.link() {
|
||||||
data.push(format!("=> {link}"));
|
data.push(match Url::parse(link) {
|
||||||
|
Ok(url) => {
|
||||||
|
if let Some(host) = url.host_str() {
|
||||||
|
format!("=> {link} {host}")
|
||||||
|
} else {
|
||||||
|
format!("=> {link}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
output.warning(&e.to_string());
|
||||||
|
format!("=> {link}")
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// record item to static file
|
// record new item file
|
||||||
File::create(&path.absolute)?.write_all(data.join("\n\n").as_bytes())?;
|
File::create(path.filepath())?.write_all(data.join("\n\n").as_bytes())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update index file
|
||||||
|
if let Some((mut file, index)) = index_request {
|
||||||
|
file.write_all(index.join("\n\n").as_bytes())?;
|
||||||
|
output.debug("index file updated");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => output.warning("channel skipped as `pub_date` required by application"),
|
||||||
}
|
}
|
||||||
|
|
||||||
output.debug(&format!(
|
output.debug(&format!(
|
||||||
|
|
|
||||||
45
src/path.rs
45
src/path.rs
|
|
@ -1,25 +1,19 @@
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
use std::path::MAIN_SEPARATOR;
|
||||||
|
|
||||||
pub struct Path {
|
pub struct Path {
|
||||||
pub absolute: String,
|
file: String,
|
||||||
pub directory: String,
|
path: String,
|
||||||
pub file: String,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Path {
|
impl Path {
|
||||||
|
// Constructors
|
||||||
|
|
||||||
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> {
|
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> {
|
||||||
use chrono::{DateTime, Datelike, Timelike};
|
use chrono::{DateTime, Datelike, Timelike};
|
||||||
use std::{fs::create_dir_all, path::MAIN_SEPARATOR};
|
|
||||||
|
|
||||||
let date_time = DateTime::parse_from_rfc2822(pub_date)?;
|
let date_time = DateTime::parse_from_rfc2822(pub_date)?;
|
||||||
|
|
||||||
let directory = format!(
|
|
||||||
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
|
|
||||||
date_time.year(),
|
|
||||||
date_time.month(),
|
|
||||||
date_time.day()
|
|
||||||
);
|
|
||||||
|
|
||||||
let file = format!(
|
let file = format!(
|
||||||
"{:02}-{:02}-{:02}.gmi",
|
"{:02}-{:02}-{:02}.gmi",
|
||||||
date_time.hour(),
|
date_time.hour(),
|
||||||
|
|
@ -27,14 +21,31 @@ impl Path {
|
||||||
date_time.second()
|
date_time.second()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let path = format!(
|
||||||
|
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
|
||||||
|
date_time.year(),
|
||||||
|
date_time.month(),
|
||||||
|
date_time.day()
|
||||||
|
);
|
||||||
|
|
||||||
if mkdir {
|
if mkdir {
|
||||||
create_dir_all(&directory)?;
|
std::fs::create_dir_all(&path)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Path {
|
Ok(Path { file, path })
|
||||||
absolute: format!("{directory}{MAIN_SEPARATOR}{file}"),
|
}
|
||||||
directory,
|
|
||||||
file,
|
// Getters
|
||||||
})
|
|
||||||
|
pub fn index(&self) -> String {
|
||||||
|
format!("{}{MAIN_SEPARATOR}index.gmi", self.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn filepath(&self) -> String {
|
||||||
|
format!("{}{MAIN_SEPARATOR}{}", self.path, self.file)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn filename(&self) -> &str {
|
||||||
|
&self.file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue