add index, limit options, order arguments asc

This commit is contained in:
yggverse 2025-02-12 18:38:31 +02:00
parent 2badd54a1a
commit 5f06b450a7
3 changed files with 26 additions and 10 deletions

View file

@ -28,6 +28,8 @@ pulsarss --source https://path/to/feed.rss
* `source`, `s` - RSS feed source (required) * `source`, `s` - RSS feed source (required)
* `target`, `t` - Destination directory (`public` by default) * `target`, `t` - Destination directory (`public` by default)
* `update`, `u` - Update timeout in seconds (`60` by default) * `update`, `u` - Update timeout in seconds (`60` by default)
* `index`, `i` - Generate `index.gmi` file (`true` by default)
* `limit`, `l` - Limit channel items (unlimited by default)
* `output`, `o` - Print output (`dw` by default): * `output`, `o` - Print output (`dw` by default):
* `d` - debug * `d` - debug
* `w` - warning * `w` - warning

View file

@ -3,6 +3,18 @@ use clap::Parser;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
pub struct Argument { pub struct Argument {
/// Generate `index.gmi` file (`true` by default)
#[arg(short, long, default_value_t = true)]
pub index: bool,
/// Limit channel items (unlimited by default)
#[arg(short, long, default_value_t = 0)]
pub limit: usize,
/// Show output (`dw` by default)
#[arg(short, long, default_value_t = String::from("dw"))]
pub output: String,
/// RSS feed source (required) /// RSS feed source (required)
#[arg(short, long)] #[arg(short, long)]
pub source: String, pub source: String,
@ -14,8 +26,4 @@ pub struct Argument {
/// Update timeout in seconds (`60` by default) /// Update timeout in seconds (`60` by default)
#[arg(short, long, default_value_t = 60)] #[arg(short, long, default_value_t = 60)]
pub update: u64, pub update: u64,
/// Show output (`dw` by default)
#[arg(short, long, default_value_t = String::from("dw"))]
pub output: String,
} }

View file

@ -2,11 +2,11 @@ mod argument;
mod destination; mod destination;
mod output; mod output;
use argument::Argument;
use output::Output; use output::Output;
use std::error::Error; use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
use argument::Argument;
use clap::Parser; use clap::Parser;
use std::{thread::sleep, time::Duration}; use std::{thread::sleep, time::Duration};
@ -16,12 +16,12 @@ fn main() -> Result<(), Box<dyn Error>> {
output.debug("crawler started"); output.debug("crawler started");
loop { loop {
crawl(&argument.source, &argument.target, &output)?; crawl(&argument, &output)?;
sleep(Duration::from_secs(argument.update)); sleep(Duration::from_secs(argument.update));
} }
} }
fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Error>> { fn crawl(argument: &Argument, output: &Output) -> Result<(), Box<dyn Error>> {
use destination::Destination; use destination::Destination;
use reqwest::blocking::get; use reqwest::blocking::get;
use rss::Channel; use rss::Channel;
@ -39,17 +39,21 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
let mut index = HashSet::new(); let mut index = HashSet::new();
// collect feed items // collect feed items
for item in Channel::read_from(&get(source)?.bytes()?[..])? for item in Channel::read_from(&get(&argument.source)?.bytes()?[..])?
.items() .items()
.iter() .iter()
{ {
if argument.limit > 0 && total >= argument.limit {
break;
}
total += 1; total += 1;
let mut data = Vec::new(); let mut data = Vec::new();
let destination = match item.pub_date() { let destination = match item.pub_date() {
Some(pub_date) => { Some(pub_date) => {
let destination = Destination::build(target, pub_date, true)?; let destination = Destination::build(&argument.target, pub_date, true)?;
if metadata(destination.item()).is_ok() { if metadata(destination.item()).is_ok() {
exist += 1; exist += 1;
continue; continue;
@ -90,7 +94,9 @@ fn crawl(source: &str, target: &str, output: &Output) -> Result<(), Box<dyn Erro
File::create(destination.item())?.write_all(data.join("\n\n").as_bytes())?; File::create(destination.item())?.write_all(data.join("\n\n").as_bytes())?;
index.insert(destination.path); // request `index.gmi` update if argument.index {
index.insert(destination.path); // request `index.gmi` update
}
} }
// renew pending `index.gmi` files on items crawl completed // renew pending `index.gmi` files on items crawl completed