mirror of
https://github.com/YGGverse/rssto.git
synced 2026-03-31 17:15:29 +00:00
134 lines
4.1 KiB
Rust
134 lines
4.1 KiB
Rust
mod argument;
|
|
mod config;
|
|
|
|
use anyhow::Result;
|
|
use argument::Argument;
|
|
use chrono::{DateTime, Local};
|
|
use clap::Parser;
|
|
use config::{Config, Feed};
|
|
use log::{debug, info};
|
|
use std::{
|
|
env::var,
|
|
fs::{File, create_dir_all, read_to_string},
|
|
io::Write,
|
|
path::PathBuf,
|
|
};
|
|
use strip_tags::*;
|
|
|
|
fn main() -> Result<()> {
|
|
if var("RUST_LOG").is_ok() {
|
|
use tracing_subscriber::{EnvFilter, fmt::*};
|
|
struct T;
|
|
impl time::FormatTime for T {
|
|
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
|
|
write!(w, "{}", Local::now())
|
|
}
|
|
}
|
|
fmt()
|
|
.with_timer(T)
|
|
.with_env_filter(EnvFilter::from_default_env())
|
|
.init()
|
|
}
|
|
|
|
let argument = Argument::parse();
|
|
let config: Config = toml::from_str(&read_to_string(argument.config)?)?;
|
|
|
|
info!("Crawler started");
|
|
|
|
loop {
|
|
debug!("Begin new crawl queue...");
|
|
|
|
for feed in &config.feed {
|
|
debug!("Update `{}`...", feed.url);
|
|
crawl(feed)?
|
|
}
|
|
|
|
debug!("Crawl queue completed");
|
|
|
|
if let Some(update) = config.update {
|
|
debug!("Wait {update} seconds to continue...",);
|
|
std::thread::sleep(std::time::Duration::from_secs(update))
|
|
} else {
|
|
return Ok(());
|
|
}
|
|
}
|
|
}
|
|
|
|
fn crawl(feed: &Feed) -> Result<()> {
|
|
use reqwest::blocking::get;
|
|
use rss::Channel;
|
|
|
|
let channel = Channel::read_from(&get(feed.url.as_str())?.bytes()?[..])?;
|
|
let channel_items = channel.items();
|
|
let channel_items_limit = feed.list_items_limit.unwrap_or(channel_items.len());
|
|
|
|
for template in &feed.templates {
|
|
let root = PathBuf::from(template);
|
|
let extension = root.file_name().unwrap().to_string_lossy();
|
|
|
|
let index = {
|
|
let mut p = PathBuf::from(&root);
|
|
p.push(format!("index.{extension}"));
|
|
read_to_string(p)?
|
|
};
|
|
|
|
let index_item = {
|
|
let mut p = PathBuf::from(&root);
|
|
p.push("index");
|
|
p.push(format!("item.{extension}"));
|
|
read_to_string(p)?
|
|
};
|
|
|
|
create_dir_all(&feed.storage)?;
|
|
File::create({
|
|
let mut p = PathBuf::from(&feed.storage);
|
|
p.push(format!("index.{extension}"));
|
|
p
|
|
})?
|
|
.write_all(
|
|
index
|
|
.replace("{title}", &strip_tags(channel.title()))
|
|
.replace("{description}", &strip_tags(channel.description()))
|
|
.replace("{link}", channel.link())
|
|
.replace("{language}", channel.language().unwrap_or_default())
|
|
.replace(
|
|
"{pub_date}",
|
|
&time(channel.pub_date(), &feed.pub_date_format),
|
|
)
|
|
.replace(
|
|
"{last_build_date}",
|
|
&time(channel.last_build_date(), &feed.last_build_date_format),
|
|
)
|
|
.replace("{time_generated}", &time(None, &feed.time_generated_format))
|
|
.replace(
|
|
"{items}",
|
|
&channel_items
|
|
.iter()
|
|
.take(channel_items_limit)
|
|
.map(|i| {
|
|
index_item
|
|
.replace("{title}", &strip_tags(i.title().unwrap_or_default()))
|
|
.replace(
|
|
"{description}",
|
|
&strip_tags(i.description().unwrap_or_default()),
|
|
)
|
|
.replace("{link}", i.link().unwrap_or_default())
|
|
.replace("{pub_date}", &time(i.pub_date(), &feed.pub_date_format))
|
|
})
|
|
.collect::<String>(),
|
|
)
|
|
.as_bytes(),
|
|
)?
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn time(value: Option<&str>, format: &str) -> String {
|
|
match value {
|
|
Some(v) => DateTime::parse_from_rfc2822(v).unwrap(),
|
|
None => Local::now().into(),
|
|
}
|
|
.format(format)
|
|
.to_string()
|
|
}
|