add gemtext support, update debug handler, reorganize api to use with file-based multi-feed config, update version to 0.2

This commit is contained in:
yggverse 2025-09-03 13:35:23 +03:00
parent c4c7ee70b8
commit 6c5e005cf8
14 changed files with 210 additions and 308 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
/public
/target /target
Cargo.lock Cargo.lock

View file

@ -1,11 +1,11 @@
[package] [package]
name = "rssto" name = "rssto"
version = "0.1.0" version = "0.2.0"
edition = "2024" edition = "2024"
license = "MIT" license = "MIT"
readme = "README.md" readme = "README.md"
description = "Aggregate RSS feeds into different formats" description = "Convert RSS feeds into multiple formats"
keywords = ["rss", "aggregator", "convertor", "conversion", "static"] keywords = ["rss", "aggregator", "conversion", "html", "gemtext"]
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"] categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
repository = "https://github.com/YGGverse/rssto" repository = "https://github.com/YGGverse/rssto"
@ -13,5 +13,10 @@ repository = "https://github.com/YGGverse/rssto"
anyhow = "1.0" anyhow = "1.0"
chrono = "^0.4.20" chrono = "^0.4.20"
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
log = "0.4"
reqwest = { version = "0.12", features = ["blocking"] } reqwest = { version = "0.12", features = ["blocking"] }
rss = "2.0" rss = "2.0"
serde = { version = "1.0", features = ["derive"] }
toml = "0.9"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
url = "2.5"

View file

@ -4,15 +4,14 @@
[![Dependencies](https://deps.rs/repo/github/YGGverse/rssto/status.svg)](https://deps.rs/repo/github/YGGverse/rssto) [![Dependencies](https://deps.rs/repo/github/YGGverse/rssto/status.svg)](https://deps.rs/repo/github/YGGverse/rssto)
[![crates.io](https://img.shields.io/crates/v/rssto.svg)](https://crates.io/crates/rssto) [![crates.io](https://img.shields.io/crates/v/rssto.svg)](https://crates.io/crates/rssto)
## Aggregate RSS feeds into different formats Convert RSS feeds into multiple formats
A simple multi-source feed aggregator that outputs static files in multiple formats.
## Roadmap ## Roadmap
* [x] HTML * [x] HTML
* [x] [Gemtext](https://geminiprotocol.net/docs/gemtext.gmi)
* [ ] JSON
* [ ] Markdown * [ ] Markdown
* [ ] Gemtext
## Install ## Install
@ -23,27 +22,12 @@ cargo install rssto
## Launch ## Launch
``` bash ``` bash
rssto --source https://path/to/source1.rss\ rssto -c config/example.toml
--target /path/to/source1dir\
--source https://path/to/source2.rss\
--target /path/to/source2dir\
--format html
```
### Options
``` bash
-d, --debug <DEBUG> Show output (`d` - debug, `e` - error, `i` - info) [default: ei]
-f, --format <FORMAT> Export formats (`html`,`md`,etc.) [default: html]
-l, --limit <LIMIT> Limit channel items (unlimited by default)
-s, --source <SOURCE> RSS feed URL(s)
--target <TARGET> Destination directory
--template <TEMPLATE> Path to template directory [default: template]
--time-format <TIME_FORMAT> Use custom time format [default: "%Y/%m/%d %H:%M:%S %z"]
-u, --update <UPDATE> Update timeout in seconds [default: 60]
-h, --help Print help
-V, --version Print version
``` ```
> [!TIP]
> * prepend `RUST_LOG=DEBUG` to print worker details (supported [levels](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.LevelFilter.html))
> * append `-u TIME` to run as the daemon with `TIME` interval update
> * see `rssto --help` to print all available options
### Autostart ### Autostart
@ -51,32 +35,35 @@ rssto --source https://path/to/source1.rss\
1. Install `rssto` by copy the binary compiled into the native system apps destination: 1. Install `rssto` by copy the binary compiled into the native system apps destination:
* Linux: `sudo cp /home/user/.cargo/bin/rssto /usr/local/bin` * Linux: `sudo install /home/user/.cargo/bin/rssto /usr/local/bin/rssto`
2. Create `systemd` configuration file: 2. Create `systemd` configuration file at `/etc/systemd/system/rssto.service`:
``` rssto.service ``` rssto.service
# /etc/systemd/system/rssto.service
[Unit] [Unit]
After=network-online.target After=network-online.target
Wants=network-online.target Wants=network-online.target
[Service] [Service]
Type=simple Type=simple
User=rssto User=rssto
Group=rssto Group=rssto
ExecStart=/usr/local/bin/rssto --source https://path/to/source1.rss\
--target /path/to/source1dir\ # Uncomment for debug
--source https://path/to/source2.rss\ # Environment="RUST_LOG=debug"
--target /path/to/source2dir\ # Environment="NO_COLOR=1"
--format html
--time-format %%Y/%%m/%%d %%H:%%M:%%S ExecStart=/usr/local/bin/rssto --config /path/to/config1.toml\
--config /path/to/config2.toml
StandardOutput=file:///home/rssto/debug.log
StandardError=file:///home/rssto/error.log
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target
``` ```
* on format time, make sure `%` is escaped to `%%` * example above requires new system user (`useradd -m rssto`)
3. Run in priority: 3. Run in priority:

19
config/example.toml Normal file
View file

@ -0,0 +1,19 @@
update = 60
[[feed]]
url = "https://assets.censor.net/rss/censor.net/rss_uk_news.xml"
storage = "./public/censor.net/rss_uk_news"
templates = ["./template/html","./template/gmi"]
list_items_limit = 20
pub_date_format = "%Y/%m/%d %H:%M:%S %z"
last_build_date_format = "%Y/%m/%d %H:%M:%S %z"
time_generated_format = "%Y/%m/%d %H:%M:%S %z"
[[feed]]
url = "https://assets.censor.net/rss/censor.net/rss_uk_resonance.xml"
storage = "./public/censor.net/rss_uk_resonance"
templates = ["./template/html","./template/gmi"]
list_items_limit = 20
pub_date_format = "%Y/%m/%d %H:%M:%S %z"
last_build_date_format = "%Y/%m/%d %H:%M:%S %z"
time_generated_format = "%Y/%m/%d %H:%M:%S %z"

View file

@ -1,37 +1,12 @@
use clap::Parser; use clap::Parser;
use std::path::PathBuf;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
pub struct Argument { pub struct Argument {
/// Show output (`d` - debug, `e` - error, `i` - info) /// Path to config file
#[arg(short, long, default_value_t = String::from("ei"))] ///
pub debug: String, /// * see `config/example.toml`
/// Export formats (`html`,`md`,etc.)
#[arg(short, long, default_values_t = [String::from("html")])]
pub format: Vec<String>,
/// Limit channel items (unlimited by default)
#[arg(short, long)] #[arg(short, long)]
pub limit: Option<usize>, pub config: PathBuf,
/// RSS feed URL(s)
#[arg(short, long)]
pub source: Vec<String>,
/// Destination directory
#[arg(long)]
pub target: Vec<String>,
/// Path to template directory
#[arg(long, default_value_t = String::from("template"))]
pub template: String,
/// Use custom time format
#[arg(long, default_value_t = String::from("%Y/%m/%d %H:%M:%S %z"))]
pub time_format: String,
/// Update timeout in seconds
#[arg(short, long, default_value_t = 60)]
pub update: u64,
} }

32
src/config.rs Normal file
View file

@ -0,0 +1,32 @@
use serde::Deserialize;
use std::path::PathBuf;
use url::Url;
#[derive(Debug, Deserialize)]
pub struct Feed {
/// RSS feed source
pub url: Url,
/// Destination directory
pub storage: PathBuf,
/// Path to templates (export formats)
pub templates: Vec<PathBuf>,
/// Limit channel items (unlimited by default)
pub list_items_limit: Option<usize>,
pub pub_date_format: String,
pub last_build_date_format: String,
pub time_generated_format: String,
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub feed: Vec<Feed>,
/// Update timeout in seconds
///
/// * None to generate once
pub update: Option<u64>,
}

View file

@ -1,48 +0,0 @@
use anyhow::{Result, bail};
#[derive(PartialEq)]
pub enum Level {
//Debug,
//Error,
Info,
}
impl Level {
fn parse(value: char) -> Result<Self> {
match value {
//'d' => Ok(Self::Debug),
//'e' => Ok(Self::Error),
'i' => Ok(Self::Info),
_ => bail!("Unsupported debug value `{value}`!"),
}
}
}
pub struct Debug(Vec<Level>);
impl Debug {
pub fn init(values: &str) -> Result<Self> {
let mut l = Vec::with_capacity(values.len());
for s in values.to_lowercase().chars() {
l.push(Level::parse(s)?);
}
Ok(Self(l))
}
/* @TODO
pub fn error(&self, message: &str) {
if self.has(Level::Error) {
eprintln!("[{}] [error] {message}", t());
}
} */
pub fn info(&self, message: &str) {
if self.0.contains(&Level::Info) {
println!("[{}] [info] {message}", t());
}
}
}
fn t() -> String {
crate::time::utc().to_rfc3339()
}

View file

@ -1,57 +0,0 @@
use anyhow::{Result, bail};
pub struct Template {
pub index: String,
pub index_item: String,
}
impl Template {
pub fn html(template_path: &str) -> Result<Self> {
use std::{fs::read_to_string, path::PathBuf, str::FromStr};
let mut p = PathBuf::from_str(template_path)?;
p.push("html");
Ok(Self {
index: read_to_string(&{
let mut p = PathBuf::from(&p);
p.push("index.html");
p
})?,
index_item: read_to_string(&{
let mut p = PathBuf::from(&p);
p.push("index");
p.push("item.html");
p
})?,
})
}
}
pub enum Type {
Html(Template),
}
impl Type {
fn parse(format: &str, template_path: &str) -> Result<Self> {
if matches!(format.to_lowercase().as_str(), "html") {
return Ok(Self::Html(Template::html(template_path)?));
}
bail!("Format `{format}` support yet not implemented!")
}
}
pub struct Format(Vec<Type>);
impl Format {
pub fn init(values: &Vec<String>, template: &str) -> Result<Self> {
let mut f = Vec::with_capacity(values.len());
for s in values {
f.push(Type::parse(s, template)?);
}
Ok(Self(f))
}
pub fn get(&self) -> &Vec<Type> {
&self.0
}
}

View file

@ -1,101 +1,130 @@
mod argument; mod argument;
mod debug; mod config;
mod format;
mod target;
mod time;
use anyhow::{Result, bail}; use anyhow::Result;
use argument::Argument; use argument::Argument;
use debug::Debug; use chrono::{DateTime, Local};
use format::Format; use clap::Parser;
use format::Type; use config::{Config, Feed};
use target::Target; use log::{debug, info};
use time::Time; use std::{
env::var,
fs::{File, create_dir_all, read_to_string},
io::Write,
path::PathBuf,
};
fn main() -> Result<()> { fn main() -> Result<()> {
use clap::Parser; if var("RUST_LOG").is_ok() {
use std::{thread::sleep, time::Duration}; use tracing_subscriber::{EnvFilter, fmt::*};
struct T;
let argument = Argument::parse(); impl time::FormatTime for T {
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
// parse argument values once write!(w, "{}", Local::now())
let debug = Debug::init(&argument.debug)?; }
let format = Format::init(&argument.format, &argument.template)?; }
let target = Target::init(&argument.target)?; fmt()
let time = Time::init(argument.time_format); .with_timer(T)
.with_env_filter(EnvFilter::from_default_env())
// validate some targets .init()
if argument.source.len() != argument.target.len() {
bail!("Targets quantity does not match sources!")
} }
debug.info("Crawler started"); let argument = Argument::parse();
let config: Config = toml::from_str(&read_to_string(argument.config)?)?;
info!("Crawler started");
loop { loop {
debug.info("Begin new crawl queue..."); debug!("Begin new crawl queue...");
for (i, s) in argument.source.iter().enumerate() {
debug.info(&format!("Update {s}...")); for feed in &config.feed {
crawl((s, i), &format, &target, &time, &argument.limit)?; debug!("Update `{}`...", feed.url);
crawl(feed)?
}
debug!("Crawl queue completed");
if let Some(update) = config.update {
debug!("Wait {update} seconds to continue...",);
std::thread::sleep(std::time::Duration::from_secs(update))
} else {
return Ok(());
} }
debug.info(&format!(
"Crawl queue completed, wait {} seconds to continue...",
argument.update
));
sleep(Duration::from_secs(argument.update));
} }
} }
fn crawl( fn crawl(feed: &Feed) -> Result<()> {
source: (&str, usize),
format: &Format,
target: &Target,
time: &Time,
limit: &Option<usize>,
) -> Result<()> {
use reqwest::blocking::get; use reqwest::blocking::get;
use rss::Channel; use rss::Channel;
use std::{fs::File, io::Write};
let c = Channel::read_from(&get(source.0)?.bytes()?[..])?; let channel = Channel::read_from(&get(feed.url.as_str())?.bytes()?[..])?;
let i = c.items(); let channel_items = channel.items();
let l = limit.unwrap_or(i.len()); let channel_items_limit = feed.list_items_limit.unwrap_or(channel_items.len());
for f in format.get() { for template in &feed.templates {
match f { let root = PathBuf::from(template);
Type::Html(template) => File::create(target.index(source.1, "html"))?.write_all( let extension = root.file_name().unwrap().to_string_lossy();
template
.index let index = {
.replace("{title}", c.title()) let mut p = PathBuf::from(&root);
.replace("{description}", c.description()) p.push(format!("index.{extension}"));
.replace("{link}", c.link()) read_to_string(p)?
.replace("{language}", c.language().unwrap_or_default()) };
.replace("{pub_date}", &time.format(c.pub_date()))
.replace("{last_build_date}", &time.format(c.last_build_date())) let index_item = {
.replace("{time_generated}", &time.now()) let mut p = PathBuf::from(&root);
.replace("{items}", &{ p.push("index");
let mut items = String::with_capacity(l); p.push(format!("item.{extension}"));
for (n, item) in i.iter().enumerate() { read_to_string(p)?
if n > l { };
break;
} create_dir_all(&feed.storage)?;
items.push_str( File::create({
&template let mut p = PathBuf::from(&feed.storage);
.index_item p.push(format!("index.{extension}"));
.replace("{title}", item.title().unwrap_or_default()) p
.replace( })?
"{description}", .write_all(
item.description().unwrap_or_default(), index
) .replace("{title}", channel.title())
.replace("{link}", item.link().unwrap_or_default()) .replace("{description}", channel.description())
.replace("{time}", &time.format(item.pub_date())), .replace("{link}", channel.link())
) .replace("{language}", channel.language().unwrap_or_default())
} .replace(
items "{pub_date}",
}) &time(channel.pub_date(), &feed.pub_date_format),
.as_bytes(), )
)?, .replace(
} "{last_build_date}",
&time(channel.last_build_date(), &feed.last_build_date_format),
)
.replace("{time_generated}", &time(None, &feed.time_generated_format))
.replace(
"{items}",
&channel_items
.iter()
.take(channel_items_limit)
.map(|i| {
index_item
.replace("{title}", i.title().unwrap_or_default())
.replace("{description}", i.description().unwrap_or_default())
.replace("{link}", i.link().unwrap_or_default())
.replace("{pub_date}", &time(i.pub_date(), &feed.pub_date_format))
})
.collect::<String>(),
)
.as_bytes(),
)?
} }
Ok(()) Ok(())
} }
fn time(value: Option<&str>, format: &str) -> String {
match value {
Some(v) => DateTime::parse_from_rfc2822(v).unwrap(),
None => Local::now().into(),
}
.format(format)
.to_string()
}

View file

@ -1,25 +0,0 @@
use anyhow::{Result, bail};
use std::{fs, path::PathBuf, str::FromStr};
pub struct Target(Vec<PathBuf>);
impl Target {
pub fn init(paths: &Vec<String>) -> Result<Self> {
let mut t = Vec::with_capacity(paths.len());
for path in paths {
let p = PathBuf::from_str(path)?;
if fs::metadata(&p).is_ok_and(|t| t.is_file()) {
bail!("Target destination exists and not directory!")
}
fs::create_dir_all(&p)?;
t.push(p)
}
Ok(Self(t))
}
pub fn index(&self, index: usize, extension: &str) -> PathBuf {
let mut p = PathBuf::from(&self.0[index]);
p.push(format!("index.{extension}"));
p
}
}

View file

@ -1,29 +0,0 @@
use chrono::{DateTime, Utc};
pub struct Time(String);
impl Time {
pub fn init(format: String) -> Self {
Self(format)
}
pub fn format(&self, value: Option<&str>) -> String {
match value {
Some(v) => chrono::DateTime::parse_from_rfc2822(v)
.unwrap()
.format(&self.0)
.to_string(),
None => todo!(),
}
}
pub fn now(&self) -> String {
utc().format(&self.0).to_string()
}
}
pub fn utc() -> DateTime<Utc> {
let s = std::time::SystemTime::now();
let c: chrono::DateTime<chrono::Utc> = s.into();
c
}

7
template/gmi/index.gmi Normal file
View file

@ -0,0 +1,7 @@
# {title}
{description}
## {time_generated}
{items}

View file

@ -0,0 +1,6 @@
### {title}
{description}
=> {link} {pub_date}

View file

@ -1,5 +1,5 @@
<article> <article>
<h2>{title}</h2> <h2>{title}</h2>
<p>{description}</p> <p>{description}</p>
<a href="{link}">{time}</a> <a href="{link}">{pub_date}</a>
</article> </article>