mirror of
https://github.com/YGGverse/pulsarss.git
synced 2026-03-31 17:15:29 +00:00
update index.gmi renew method (now based on all items collected)
This commit is contained in:
parent
571f5e529b
commit
5a69f81807
2 changed files with 50 additions and 37 deletions
|
|
@ -1,17 +1,15 @@
|
||||||
use chrono::{DateTime, FixedOffset};
|
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::path::MAIN_SEPARATOR;
|
use std::path::MAIN_SEPARATOR;
|
||||||
|
|
||||||
pub struct Path {
|
pub struct Destination {
|
||||||
pub item: String,
|
pub item: String,
|
||||||
pub path: String,
|
pub path: String,
|
||||||
pub time: DateTime<FixedOffset>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Path {
|
impl Destination {
|
||||||
// Constructors
|
// Constructors
|
||||||
|
|
||||||
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Path, Box<dyn Error>> {
|
pub fn build(base: &str, pub_date: &str, mkdir: bool) -> Result<Self, Box<dyn Error>> {
|
||||||
use chrono::{DateTime, Datelike, Timelike};
|
use chrono::{DateTime, Datelike, Timelike};
|
||||||
|
|
||||||
let time = DateTime::parse_from_rfc2822(pub_date)?;
|
let time = DateTime::parse_from_rfc2822(pub_date)?;
|
||||||
|
|
@ -24,7 +22,7 @@ impl Path {
|
||||||
);
|
);
|
||||||
|
|
||||||
let path = format!(
|
let path = format!(
|
||||||
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}",
|
"{base}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}{:02}{MAIN_SEPARATOR}",
|
||||||
time.year(),
|
time.year(),
|
||||||
time.month(),
|
time.month(),
|
||||||
time.day()
|
time.day()
|
||||||
|
|
@ -34,16 +32,12 @@ impl Path {
|
||||||
std::fs::create_dir_all(&path)?;
|
std::fs::create_dir_all(&path)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Path { item, path, time })
|
Ok(Self { item, path })
|
||||||
}
|
}
|
||||||
|
|
||||||
// Getters
|
// Getters
|
||||||
|
|
||||||
pub fn index(&self) -> String {
|
|
||||||
format!("{}{MAIN_SEPARATOR}index.gmi", self.path)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn item(&self) -> String {
|
pub fn item(&self) -> String {
|
||||||
format!("{}{MAIN_SEPARATOR}{}", self.path, self.item)
|
format!("{}{}", self.path, self.item)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
69
src/main.rs
69
src/main.rs
|
|
@ -1,6 +1,6 @@
|
||||||
mod argument;
|
mod argument;
|
||||||
|
mod destination;
|
||||||
mod output;
|
mod output;
|
||||||
mod path;
|
|
||||||
|
|
||||||
use output::Output;
|
use output::Output;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
@ -22,21 +22,23 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
|
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> {
|
||||||
use path::Path;
|
use destination::Destination;
|
||||||
use reqwest::blocking::get;
|
use reqwest::blocking::get;
|
||||||
use rss::Channel;
|
use rss::Channel;
|
||||||
use std::{
|
use std::{
|
||||||
fs::{metadata, File, OpenOptions},
|
collections::HashSet,
|
||||||
io::Write,
|
fs::{metadata, read_dir, File},
|
||||||
|
io::{Read, Write},
|
||||||
};
|
};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
output.debug("update begin");
|
output.debug("feed update begin");
|
||||||
|
|
||||||
let mut total = 0;
|
let mut total = 0;
|
||||||
let mut exist = 0;
|
let mut exist = 0;
|
||||||
|
let mut index = HashSet::new();
|
||||||
|
|
||||||
// handle feed items
|
// collect feed items
|
||||||
for item in Channel::read_from(&get(source)?.bytes()?[..])?
|
for item in Channel::read_from(&get(source)?.bytes()?[..])?
|
||||||
.items()
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -45,16 +47,16 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
||||||
|
|
||||||
let mut data = Vec::new();
|
let mut data = Vec::new();
|
||||||
|
|
||||||
let path = match item.pub_date() {
|
let destination = match item.pub_date() {
|
||||||
Some(pub_date) => {
|
Some(pub_date) => {
|
||||||
let path = Path::build(target, pub_date, true)?;
|
let destination = Destination::build(target, pub_date, true)?;
|
||||||
if metadata(path.item()).is_ok() {
|
if metadata(destination.item()).is_ok() {
|
||||||
exist += 1;
|
exist += 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
data.push(format!("# {pub_date}"));
|
data.push(format!("# {pub_date}"));
|
||||||
path
|
destination
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
output.warning("item skipped as `pub_date` required by application");
|
output.warning("item skipped as `pub_date` required by application");
|
||||||
|
|
@ -62,24 +64,11 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut index = match OpenOptions::new().append(true).open(path.index()) {
|
|
||||||
Ok(index) => index,
|
|
||||||
Err(_) => {
|
|
||||||
let mut index = File::create_new(path.index())?;
|
|
||||||
index.write_all(format!("# {}\n", path.time.to_rfc2822()).as_bytes())?;
|
|
||||||
index
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
index.write_all(format!("\n=> {} {}\n", path.item, path.time).as_bytes())?;
|
|
||||||
|
|
||||||
if let Some(description) = item.description() {
|
if let Some(description) = item.description() {
|
||||||
index.write_all(format!("\n{description}\n").as_bytes())?;
|
|
||||||
data.push(description.to_string());
|
data.push(description.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(content) = item.content() {
|
if let Some(content) = item.content() {
|
||||||
index.write_all(format!("\n{content}\n").as_bytes())?;
|
|
||||||
data.push(content.to_string());
|
data.push(content.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -99,11 +88,41 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
File::create(path.item())?.write_all(data.join("\n\n").as_bytes())?;
|
File::create(destination.item())?.write_all(data.join("\n\n").as_bytes())?;
|
||||||
|
|
||||||
|
index.insert(destination.path); // request `index.gmi` update
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// renew pending `index.gmi` files on items crawl completed
|
||||||
|
for path in index {
|
||||||
|
let subject = format!("{path}index.gmi");
|
||||||
|
|
||||||
|
let mut index = File::create(&subject)?;
|
||||||
|
let mut data = Vec::with_capacity(10); // @TODO
|
||||||
|
|
||||||
|
let mut total = 0;
|
||||||
|
for file in read_dir(&path)? {
|
||||||
|
let name = file?.file_name().into_string().unwrap();
|
||||||
|
|
||||||
|
if name == "index.gmi" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buffer = String::new();
|
||||||
|
File::open(&format!("{path}{name}"))?.read_to_string(&mut buffer)?;
|
||||||
|
data.push(buffer);
|
||||||
|
|
||||||
|
total += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
index.write_all(data.join("\n\n").as_bytes())?;
|
||||||
|
|
||||||
|
output.debug(&format!("renew `{subject}` (total: {total})"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// print totals
|
||||||
output.debug(&format!(
|
output.debug(&format!(
|
||||||
"update completed (added: {} / exist: {exist} / total: {total})",
|
"feed update completed (added: {} / exist: {exist} / total: {total})",
|
||||||
total - exist
|
total - exist
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue