mirror of
https://github.com/YGGverse/rssto.git
synced 2026-03-31 17:15:29 +00:00
Merge 2bb7a2da69 into 8dfc595961
This commit is contained in:
commit
ad46b34813
46 changed files with 1969 additions and 356 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,3 +1,2 @@
|
||||||
/public
|
|
||||||
/target
|
/target
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
32
Cargo.toml
32
Cargo.toml
|
|
@ -1,24 +1,8 @@
|
||||||
[package]
|
[workspace]
|
||||||
name = "rssto"
|
resolver = "2"
|
||||||
version = "0.2.2"
|
members = [
|
||||||
edition = "2024"
|
"crates/crawler",
|
||||||
license = "MIT"
|
"crates/http",
|
||||||
readme = "README.md"
|
"crates/llm",
|
||||||
description = "Convert RSS feeds into multiple formats"
|
"crates/mysql",
|
||||||
keywords = ["rss", "aggregator", "conversion", "html", "gemtext"]
|
]
|
||||||
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
|
|
||||||
repository = "https://github.com/YGGverse/rssto"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
anyhow = "1.0"
|
|
||||||
chrono = "^0.4.20"
|
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
|
||||||
log = "0.4"
|
|
||||||
regex = "1.12"
|
|
||||||
reqwest = { version = "0.12", features = ["blocking"] }
|
|
||||||
rss = "2.0"
|
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
|
||||||
strip-tags = "0.1"
|
|
||||||
toml = "0.9"
|
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
||||||
url = "2.5"
|
|
||||||
72
README.md
72
README.md
|
|
@ -4,70 +4,14 @@
|
||||||
[](https://deps.rs/repo/github/YGGverse/rssto)
|
[](https://deps.rs/repo/github/YGGverse/rssto)
|
||||||
[](https://crates.io/crates/rssto)
|
[](https://crates.io/crates/rssto)
|
||||||
|
|
||||||
Convert RSS feeds into multiple formats
|
Crawl content from RSS feeds into multiple formats
|
||||||
|
|
||||||
## Features
|
> [!NOTE]
|
||||||
|
> Branch in development!
|
||||||
|
|
||||||
* [x] Multiple feed sources with flexible TOML config options
|
## Components
|
||||||
* [x] Limit channel items
|
|
||||||
* [x] Format time
|
|
||||||
* [x] Multiple export format definition
|
|
||||||
* [x] Custom templates
|
|
||||||
* [x] Single export or daemon mode with update time
|
|
||||||
* [x] Export formats:
|
|
||||||
* [x] HTML
|
|
||||||
* [x] [Gemtext](https://geminiprotocol.net/docs/gemtext.gmi)
|
|
||||||
|
|
||||||
## Install
|
* `rssto-crawler` - RSS feed reader and data scrapper daemon
|
||||||
|
* `rssto-http` - Web server implementation based on the Rocket engine
|
||||||
``` bash
|
* `rssto-llm` - Feeds auto-translation
|
||||||
cargo install rssto
|
* `rssto-mysql` - Shared database library
|
||||||
```
|
|
||||||
|
|
||||||
## Launch
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
rssto -c config/example.toml
|
|
||||||
```
|
|
||||||
> [!TIP]
|
|
||||||
> * prepend `RUST_LOG=DEBUG` to print worker details (supported [levels](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.LevelFilter.html))
|
|
||||||
> * append `-u TIME` to run as the daemon with `TIME` interval update
|
|
||||||
> * see `rssto --help` to print all available options
|
|
||||||
|
|
||||||
### Systemd
|
|
||||||
|
|
||||||
1. Install `rssto` by copy the binary compiled into the native system apps destination:
|
|
||||||
* Linux: `sudo install /home/user/.cargo/bin/rssto /usr/local/bin/rssto`
|
|
||||||
2. Create `systemd` configuration file at `/etc/systemd/system/rssto.service`:
|
|
||||||
|
|
||||||
``` rssto.service
|
|
||||||
[Unit]
|
|
||||||
After=network-online.target
|
|
||||||
Wants=network-online.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
|
|
||||||
User=rssto
|
|
||||||
Group=rssto
|
|
||||||
|
|
||||||
# Uncomment for debug
|
|
||||||
# Environment="RUST_LOG=DEBUG"
|
|
||||||
# Environment="NO_COLOR=1"
|
|
||||||
|
|
||||||
ExecStart=/usr/local/bin/rssto -c /path/to/config.toml
|
|
||||||
|
|
||||||
StandardOutput=file:///home/rssto/debug.log
|
|
||||||
StandardError=file:///home/rssto/error.log
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
```
|
|
||||||
* example above requires new system user (`useradd -m rssto`)
|
|
||||||
|
|
||||||
3. Run in priority:
|
|
||||||
|
|
||||||
* `systemctl daemon-reload` - reload systemd configuration
|
|
||||||
* `systemctl enable rssto` - enable new service
|
|
||||||
* `systemctl start rssto` - start the process
|
|
||||||
* `systemctl status rssto` - check process launched
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
update = 60
|
|
||||||
|
|
||||||
[[feed]]
|
|
||||||
url = "https://assets.censor.net/rss/censor.net/rss_uk_news.xml"
|
|
||||||
storage = "./public/censor.net/rss_uk_news"
|
|
||||||
templates = ["./template/html","./template/gmi"]
|
|
||||||
list_items_limit = 20
|
|
||||||
pub_date_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
last_build_date_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
time_generated_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
|
|
||||||
[[feed]]
|
|
||||||
url = "https://assets.censor.net/rss/censor.net/rss_uk_resonance.xml"
|
|
||||||
storage = "./public/censor.net/rss_uk_resonance"
|
|
||||||
templates = ["./template/html","./template/gmi"]
|
|
||||||
list_items_limit = 20
|
|
||||||
pub_date_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
last_build_date_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
time_generated_format = "%Y/%m/%d %H:%M:%S %z"
|
|
||||||
26
crates/crawler/Cargo.toml
Normal file
26
crates/crawler/Cargo.toml
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
[package]
|
||||||
|
name = "rssto-crawler"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
license = "MIT"
|
||||||
|
readme = "README.md"
|
||||||
|
description = "Crawl RSS feeds into MySQL database"
|
||||||
|
keywords = ["rss", "aggregator", "conversion", "mysql", "crawler"]
|
||||||
|
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
|
||||||
|
repository = "https://github.com/YGGverse/rssto"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
ammonia = "4.1.2"
|
||||||
|
anyhow = "1.0.100"
|
||||||
|
chrono = "0.4.42"
|
||||||
|
clap = { version = "4.5.54", features = ["derive"] }
|
||||||
|
log = "0.4.29"
|
||||||
|
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transaction"], path = "../mysql" }
|
||||||
|
reqwest = { version = "0.13.1", features = ["blocking"] }
|
||||||
|
rss = "2.0.12"
|
||||||
|
scraper = { version = "0.25.0", features = ["serde"] }
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
sha2 = "0.10.9"
|
||||||
|
toml = "0.9.10"
|
||||||
|
tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
|
||||||
|
url = { version = "2.5.8", features = ["serde"] }
|
||||||
21
crates/crawler/LICENSE
Normal file
21
crates/crawler/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 YGGverse
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
58
crates/crawler/README.md
Normal file
58
crates/crawler/README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
# rssto-crawler
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
git clone https://github.com/YGGverse/rssto.git
|
||||||
|
cd rssto
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
## Launch
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
rssto-crawler -c config/example.toml
|
||||||
|
```
|
||||||
|
> [!TIP]
|
||||||
|
> * prepend `RUST_LOG=rssto_crawler=trace` to print worker details (supported [levels](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.LevelFilter.html))
|
||||||
|
> * or just `RUST_LOG=trace` to debug all components in use
|
||||||
|
> * append `-u TIME` to run as the daemon with `TIME` interval update
|
||||||
|
> * see `rssto-crawler --help` to print all available options
|
||||||
|
|
||||||
|
### Systemd
|
||||||
|
|
||||||
|
1. Install `rssto-crawler` by copy the binary compiled into the native system apps destination:
|
||||||
|
* Linux: `sudo install target/release/rssto-crawler /usr/local/bin/rssto-crawler`
|
||||||
|
2. Create `systemd` configuration file at `/etc/systemd/system/rssto-crawler.service`:
|
||||||
|
|
||||||
|
``` rssto-crawler.service
|
||||||
|
[Unit]
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
|
||||||
|
User=rssto
|
||||||
|
Group=rssto
|
||||||
|
|
||||||
|
# Uncomment for debug
|
||||||
|
# Environment="RUST_LOG=rssto_crawler=debug"
|
||||||
|
# Environment="NO_COLOR=1"
|
||||||
|
|
||||||
|
ExecStart=/usr/local/bin/rssto-crawler -c /path/to/config.toml
|
||||||
|
|
||||||
|
StandardOutput=file:///home/rssto/crawler-debug.log
|
||||||
|
StandardError=file:///home/rssto/crawler-error.log
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
```
|
||||||
|
* example above requires new system user (`useradd -m rssto`)
|
||||||
|
|
||||||
|
3. Run in priority:
|
||||||
|
|
||||||
|
* `systemctl daemon-reload` - reload systemd configuration
|
||||||
|
* `systemctl enable rssto-crawler` - enable new service
|
||||||
|
* `systemctl start rssto-crawler` - start the process
|
||||||
|
* `systemctl status rssto-crawler` - check process launched
|
||||||
80
crates/crawler/config.toml
Normal file
80
crates/crawler/config.toml
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
# Rescan feed channels time, in seconds
|
||||||
|
update = 900
|
||||||
|
|
||||||
|
# Database connection setup
|
||||||
|
# * see crates/mysql/database
|
||||||
|
[mysql]
|
||||||
|
|
||||||
|
host = "localhost"
|
||||||
|
port = 3306
|
||||||
|
username = ""
|
||||||
|
password = ""
|
||||||
|
database = "rssto"
|
||||||
|
|
||||||
|
# Content sources (unlimited)
|
||||||
|
[[channel]]
|
||||||
|
|
||||||
|
# RSS feed source
|
||||||
|
url = "https://1"
|
||||||
|
|
||||||
|
# Limit latest channel items to crawl (unlimited by default)
|
||||||
|
items_limit = 5
|
||||||
|
|
||||||
|
# Save Channel `title` and `description` in the database (currently not in use)
|
||||||
|
persist_description = true
|
||||||
|
|
||||||
|
# Save Channel item `title` and `description` in the database
|
||||||
|
persist_item_description = true
|
||||||
|
|
||||||
|
# Allowed tags
|
||||||
|
# * empty to strip all tags (default)
|
||||||
|
allowed_tags = ["a", "br", "p", "img"]
|
||||||
|
|
||||||
|
# Grab Channel item content (from the item `link`)
|
||||||
|
scrape_item_content = false
|
||||||
|
|
||||||
|
# Scrape title by CSS selector
|
||||||
|
# * None to use Channel item title if exists or fail to continue
|
||||||
|
# scrape_item_content_title_selector = "h1"
|
||||||
|
|
||||||
|
# Scrape description by CSS selector
|
||||||
|
# * None to use Channel item description if exists or fail to continue
|
||||||
|
# scrape_item_content_description_selector = "article"
|
||||||
|
|
||||||
|
# Preload content images locally if `Some`
|
||||||
|
# * currently stored in the database
|
||||||
|
# persist_images_selector = "img"
|
||||||
|
|
||||||
|
|
||||||
|
[[channel]]
|
||||||
|
|
||||||
|
# RSS feed source
|
||||||
|
url = "https://2"
|
||||||
|
|
||||||
|
# Limit latest channel items to crawl (unlimited by default)
|
||||||
|
items_limit = 5
|
||||||
|
|
||||||
|
# Save Channel `title` and `description` in the database (currently not in use)
|
||||||
|
persist_description = true
|
||||||
|
|
||||||
|
# Save Channel item `title` and `description` in the database
|
||||||
|
persist_item_description = true
|
||||||
|
|
||||||
|
# Allowed tags
|
||||||
|
# * empty to strip all tags (default)
|
||||||
|
allowed_tags = ["a", "br", "p", "img"]
|
||||||
|
|
||||||
|
# Grab Channel item content (from the item `link`)
|
||||||
|
scrape_item_content = false
|
||||||
|
|
||||||
|
# Scrape title by CSS selector
|
||||||
|
# * None to use Channel item title if exists or fail to continue
|
||||||
|
# scrape_item_content_title_selector = "h1"
|
||||||
|
|
||||||
|
# Scrape description by CSS selector
|
||||||
|
# * None to use Channel item description if exists or fail to continue
|
||||||
|
# scrape_item_content_description_selector = "article"
|
||||||
|
|
||||||
|
# Preload content images locally if `Some`
|
||||||
|
# * currently stored in the database
|
||||||
|
# persist_images_selector = "img"
|
||||||
|
|
@ -6,7 +6,7 @@ use std::path::PathBuf;
|
||||||
pub struct Argument {
|
pub struct Argument {
|
||||||
/// Path to config file
|
/// Path to config file
|
||||||
///
|
///
|
||||||
/// * see `config/example.toml`
|
/// * see `config.toml`
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
pub config: PathBuf,
|
pub config: PathBuf,
|
||||||
}
|
}
|
||||||
47
crates/crawler/src/config.rs
Normal file
47
crates/crawler/src/config.rs
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
use scraper::Selector;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Mysql {
|
||||||
|
pub database: String,
|
||||||
|
pub host: String,
|
||||||
|
pub password: String,
|
||||||
|
pub port: u16,
|
||||||
|
pub username: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Channel {
|
||||||
|
/// RSS feed source
|
||||||
|
pub url: Url,
|
||||||
|
/// Limit latest channel items to crawl (unlimited by default)
|
||||||
|
pub items_limit: Option<usize>,
|
||||||
|
/// Save Channel title and description in the database
|
||||||
|
pub persist_description: bool,
|
||||||
|
/// Save Channel item title and description in the database
|
||||||
|
pub persist_item_description: bool,
|
||||||
|
/// Grab Channel item content (from the item `link`)
|
||||||
|
pub scrape_item_content: bool,
|
||||||
|
/// Scrape title by CSS selector
|
||||||
|
/// * None to use Channel item title if exists or fail to continue
|
||||||
|
pub scrape_item_content_title_selector: Option<Selector>,
|
||||||
|
/// Scrape description by CSS selector
|
||||||
|
/// * None to use Channel item description if exists or fail to continue
|
||||||
|
pub scrape_item_content_description_selector: Option<Selector>,
|
||||||
|
/// Allowed tags
|
||||||
|
/// * empty to strip all tags (default)
|
||||||
|
pub allowed_tags: std::collections::HashSet<String>,
|
||||||
|
/// Preload content images locally if `Some`
|
||||||
|
/// * currently stored in the database
|
||||||
|
pub persist_images_selector: Option<Selector>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub mysql: Mysql,
|
||||||
|
pub channel: Vec<Channel>,
|
||||||
|
/// Channels update timeout in seconds
|
||||||
|
/// * None to generate once
|
||||||
|
pub update: Option<u64>,
|
||||||
|
}
|
||||||
219
crates/crawler/src/main.rs
Normal file
219
crates/crawler/src/main.rs
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
mod argument;
|
||||||
|
mod config;
|
||||||
|
|
||||||
|
use anyhow::{Result, bail};
|
||||||
|
use log::{debug, info, warn};
|
||||||
|
use reqwest::blocking::get;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
use chrono::Local;
|
||||||
|
use clap::Parser;
|
||||||
|
use std::{env::var, fs::read_to_string};
|
||||||
|
|
||||||
|
if var("RUST_LOG").is_ok() {
|
||||||
|
use tracing_subscriber::{EnvFilter, fmt::*};
|
||||||
|
struct T;
|
||||||
|
impl time::FormatTime for T {
|
||||||
|
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
|
||||||
|
write!(w, "{}", Local::now())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt()
|
||||||
|
.with_timer(T)
|
||||||
|
.with_env_filter(EnvFilter::from_default_env())
|
||||||
|
.init()
|
||||||
|
}
|
||||||
|
|
||||||
|
let argument = argument::Argument::parse();
|
||||||
|
let config: config::Config = toml::from_str(&read_to_string(argument.config)?)?;
|
||||||
|
let db = mysql::Database::pool(
|
||||||
|
&config.mysql.host,
|
||||||
|
config.mysql.port,
|
||||||
|
&config.mysql.username,
|
||||||
|
&config.mysql.password,
|
||||||
|
&config.mysql.database,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
info!("Crawler started");
|
||||||
|
loop {
|
||||||
|
debug!("Begin new crawl queue...");
|
||||||
|
for c in &config.channel {
|
||||||
|
debug!("Update `{}`...", c.url);
|
||||||
|
let mut tx = db.transaction()?;
|
||||||
|
match crawl(&mut tx, c) {
|
||||||
|
Ok(()) => tx.commit()?,
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Channel `{}` update failed: `{e}`", c.url);
|
||||||
|
tx.rollback()?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
debug!("Crawl queue completed");
|
||||||
|
if let Some(update) = config.update {
|
||||||
|
debug!("Wait {update} seconds to continue...",);
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(update))
|
||||||
|
} else {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn crawl(tx: &mut mysql::Transaction, channel_config: &config::Channel) -> Result<()> {
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
/// Removes all tags from `html` excluding `allowed_tags` or all if None
|
||||||
|
fn strip_tags(html: &str, allowed_tags: Option<&HashSet<String>>) -> String {
|
||||||
|
ammonia::Builder::new()
|
||||||
|
.tags(allowed_tags.map_or(HashSet::new(), |a| a.iter().map(|t| t.as_str()).collect()))
|
||||||
|
.clean(html)
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
let channel_url = channel_config.url.to_string(); // allocate once
|
||||||
|
|
||||||
|
let channel_id = match tx.channel_id_by_url(&channel_url)? {
|
||||||
|
Some(channel_id) => channel_id,
|
||||||
|
None => {
|
||||||
|
let channel_id = tx.insert_channel(&channel_url)?;
|
||||||
|
info!("Register new channel #{channel_id} ({channel_url})");
|
||||||
|
channel_id
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let channel_items =
|
||||||
|
match rss::Channel::read_from(&get(channel_config.url.as_str())?.bytes()?[..]) {
|
||||||
|
Ok(channel) => {
|
||||||
|
if channel_config.persist_description {
|
||||||
|
let channel_description_id = tx.insert_channel_description(
|
||||||
|
channel_id,
|
||||||
|
None,
|
||||||
|
Some(strip_tags(channel.title(), None)),
|
||||||
|
Some(strip_tags(
|
||||||
|
channel.description(),
|
||||||
|
Some(&channel_config.allowed_tags),
|
||||||
|
)),
|
||||||
|
)?;
|
||||||
|
debug!("Save channel description #{channel_description_id}")
|
||||||
|
}
|
||||||
|
channel.into_items()
|
||||||
|
}
|
||||||
|
Err(e) => bail!("Could not parse response: `{e}`"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let channel_items_limit = channel_config.items_limit.unwrap_or(channel_items.len());
|
||||||
|
|
||||||
|
for channel_item in channel_items.iter().take(channel_items_limit) {
|
||||||
|
let guid = match channel_item.guid {
|
||||||
|
Some(ref guid) => guid.value.as_ref(),
|
||||||
|
None => bail!("Undefined `guid` field"),
|
||||||
|
};
|
||||||
|
let (link, base) = match channel_item.link {
|
||||||
|
Some(ref link) => (link, Url::parse(link)?),
|
||||||
|
None => bail!("Undefined `link` field"),
|
||||||
|
};
|
||||||
|
let pub_date = match channel_item.pub_date {
|
||||||
|
Some(ref pub_date) => match chrono::DateTime::parse_from_rfc2822(pub_date) {
|
||||||
|
Ok(t) => t.timestamp(),
|
||||||
|
Err(e) => bail!("Invalid `pub_date` field: `{e}`"),
|
||||||
|
},
|
||||||
|
None => bail!("Undefined `pub_date`"),
|
||||||
|
};
|
||||||
|
if tx.channel_items_total_by_channel_id_guid(channel_id, guid)? > 0 {
|
||||||
|
debug!("Channel item `{guid}` already exists, skipped.");
|
||||||
|
continue; // skip next steps as processed
|
||||||
|
}
|
||||||
|
let channel_item_id = tx.insert_channel_item(channel_id, pub_date, guid, link)?;
|
||||||
|
info!("Register new channel item #{channel_item_id} ({link})");
|
||||||
|
if channel_config.persist_item_description {
|
||||||
|
let channel_item_description_id = tx.insert_channel_item_description(
|
||||||
|
channel_item_id,
|
||||||
|
None,
|
||||||
|
channel_item.title().map(|s| strip_tags(s, None)),
|
||||||
|
channel_item
|
||||||
|
.description()
|
||||||
|
.map(|s| strip_tags(s, Some(&channel_config.allowed_tags))),
|
||||||
|
)?;
|
||||||
|
debug!("Save channel item description #{channel_item_description_id}")
|
||||||
|
}
|
||||||
|
// preload remote content..
|
||||||
|
if !channel_config.scrape_item_content {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let channel_item_content_id = tx.insert_channel_item_content(channel_item_id)?;
|
||||||
|
info!("Add new content record #{channel_item_content_id}");
|
||||||
|
|
||||||
|
let html = scraper::Html::parse_document(&get(link)?.text()?);
|
||||||
|
let description = match channel_config.scrape_item_content_description_selector {
|
||||||
|
Some(ref selector) => match html.select(selector).next() {
|
||||||
|
Some(description) => Some(strip_tags(
|
||||||
|
&description.inner_html(),
|
||||||
|
Some(&channel_config.allowed_tags),
|
||||||
|
)),
|
||||||
|
None => bail!("Could not scrape `description` selector from `{link}`"),
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
let channel_item_content_description_id = tx.insert_channel_item_content_description(
|
||||||
|
channel_item_content_id,
|
||||||
|
None,
|
||||||
|
match channel_config.scrape_item_content_title_selector {
|
||||||
|
Some(ref selector) => match html.select(selector).next() {
|
||||||
|
Some(title) => Some(strip_tags(&title.inner_html(), None)),
|
||||||
|
None => bail!("Could not scrape `title` selector from `{link}`"),
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
.as_ref()
|
||||||
|
.map(|s| s.trim()),
|
||||||
|
description.as_ref().map(|s| s.trim()),
|
||||||
|
)?;
|
||||||
|
debug!("Save channel item content description #{channel_item_content_description_id}");
|
||||||
|
// persist images if enabled
|
||||||
|
if let Some(ref selector) = channel_config.persist_images_selector {
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
if description.is_none() {
|
||||||
|
bail!("Field `description` is required to scrape images from `{link}`")
|
||||||
|
}
|
||||||
|
for element in scraper::Html::parse_document(&description.unwrap()).select(selector) {
|
||||||
|
if let Some(src) = element.value().attr("src") {
|
||||||
|
let absolute = match Url::parse(src) {
|
||||||
|
Ok(url) => url,
|
||||||
|
Err(e) => {
|
||||||
|
if e == url::ParseError::RelativeUrlWithoutBase {
|
||||||
|
let absolute = base.join(link)?;
|
||||||
|
debug!("Convert relative image link `{link}` to `{absolute}`");
|
||||||
|
absolute
|
||||||
|
} else {
|
||||||
|
bail!("Could not parse URL from img source: `{e}`")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let url = absolute.as_str();
|
||||||
|
let data = get(url)?.bytes()?;
|
||||||
|
let hash = format!("{:x}", Sha256::digest(&data));
|
||||||
|
|
||||||
|
let image_id = match tx.image_id_by_sha256(&hash)? {
|
||||||
|
Some(image_id) => image_id,
|
||||||
|
None => {
|
||||||
|
let image_id = tx.insert_image(&hash, Some(src), Some(url), &data)?;
|
||||||
|
info!("Persist new image #{image_id} (`{absolute}`)");
|
||||||
|
image_id
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let channel_item_content_image_id =
|
||||||
|
tx.insert_channel_item_content_image(channel_item_content_id, image_id)?;
|
||||||
|
debug!("Add content image relationship #{channel_item_content_image_id}");
|
||||||
|
let uri = format!("/image/{image_id}");
|
||||||
|
tx.replace_channel_item_content_description(
|
||||||
|
channel_item_content_description_id,
|
||||||
|
src,
|
||||||
|
&uri,
|
||||||
|
)?;
|
||||||
|
debug!("Replace content image in description from `{src}` to `{uri}`")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
19
crates/http/Cargo.toml
Normal file
19
crates/http/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
[package]
|
||||||
|
name = "rssto-http"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
license = "MIT"
|
||||||
|
readme = "README.md"
|
||||||
|
description = "Web server for the rssto DB, based on Rocket engine"
|
||||||
|
keywords = ["rss", "aggregator", "http", "server"]
|
||||||
|
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
|
||||||
|
repository = "https://github.com/YGGverse/rssto"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
chrono = { version = "0.4.41", features = ["serde"] }
|
||||||
|
clap = { version = "4.5.54", features = ["derive"] }
|
||||||
|
mysql = { package = "rssto-mysql", version = "0.1.0", path = "../mysql" }
|
||||||
|
rocket = "0.5.1"
|
||||||
|
rocket_dyn_templates = { version = "0.2.0", features = ["tera"] }
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
toml = "0.9.10"
|
||||||
21
crates/http/LICENSE
Normal file
21
crates/http/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 YGGverse
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
11
crates/http/README.md
Normal file
11
crates/http/README.md
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
# rssto-http
|
||||||
|
|
||||||
|
Web server implementation based on the Rocket engine
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> In development!
|
||||||
|
|
||||||
|
```
|
||||||
|
cd rssto/crates/rssto-http
|
||||||
|
cargo run -- -c /path/to/config.toml
|
||||||
|
```
|
||||||
29
crates/http/config.toml
Normal file
29
crates/http/config.toml
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
title = "rssto"
|
||||||
|
#description = ""
|
||||||
|
|
||||||
|
format_time = "%d/%m/%Y %H:%M"
|
||||||
|
|
||||||
|
# Provider ID (`provider` table)
|
||||||
|
# * None for the original content
|
||||||
|
# provider_id = 1
|
||||||
|
|
||||||
|
# Default listing limit
|
||||||
|
list_limit = 20
|
||||||
|
|
||||||
|
# Bind server on given host
|
||||||
|
host = "127.0.0.1"
|
||||||
|
|
||||||
|
# Bind server on given port
|
||||||
|
port = 8000
|
||||||
|
|
||||||
|
#Configure instance in the debug mode
|
||||||
|
debug = true
|
||||||
|
|
||||||
|
# Database connection setup
|
||||||
|
# * see crates/mysql/database
|
||||||
|
[mysql]
|
||||||
|
host = "localhost"
|
||||||
|
port = 3306
|
||||||
|
username = ""
|
||||||
|
password = ""
|
||||||
|
database = "rssto"
|
||||||
12
crates/http/src/argument.rs
Normal file
12
crates/http/src/argument.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
use clap::Parser;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
pub struct Argument {
|
||||||
|
/// Path to config file
|
||||||
|
///
|
||||||
|
/// * see `config.toml`
|
||||||
|
#[arg(short, long)]
|
||||||
|
pub config: PathBuf,
|
||||||
|
}
|
||||||
24
crates/http/src/config.rs
Normal file
24
crates/http/src/config.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Mysql {
|
||||||
|
pub database: String,
|
||||||
|
pub host: String,
|
||||||
|
pub password: String,
|
||||||
|
pub port: u16,
|
||||||
|
pub username: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub mysql: Mysql,
|
||||||
|
pub title: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub format_time: String,
|
||||||
|
pub provider_id: Option<u64>,
|
||||||
|
pub list_limit: usize,
|
||||||
|
pub host: IpAddr,
|
||||||
|
pub port: u16,
|
||||||
|
pub debug: bool,
|
||||||
|
}
|
||||||
58
crates/http/src/feed.rs
Normal file
58
crates/http/src/feed.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
/// Export crawl index to the RSS file
|
||||||
|
pub struct Feed {
|
||||||
|
buffer: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Feed {
|
||||||
|
pub fn new(title: &str, description: Option<&str>, capacity: usize) -> Self {
|
||||||
|
let t = chrono::Utc::now().to_rfc2822();
|
||||||
|
let mut buffer = String::with_capacity(capacity);
|
||||||
|
|
||||||
|
buffer.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?><rss version=\"2.0\"><channel>");
|
||||||
|
|
||||||
|
buffer.push_str(&format!("<pubDate>{t}</pubDate>"));
|
||||||
|
buffer.push_str(&format!("<lastBuildDate>{t}</lastBuildDate>"));
|
||||||
|
buffer.push_str(&format!("<title>{}</title>", escape(title)));
|
||||||
|
|
||||||
|
if let Some(d) = description {
|
||||||
|
buffer.push_str(&format!("<description>{}</description>", escape(d)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { buffer }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append `item` to the feed `channel`
|
||||||
|
pub fn push(
|
||||||
|
&mut self,
|
||||||
|
guid: u64,
|
||||||
|
time: chrono::DateTime<chrono::Utc>,
|
||||||
|
url: String,
|
||||||
|
title: String,
|
||||||
|
description: String,
|
||||||
|
) {
|
||||||
|
self.buffer.push_str(&format!(
|
||||||
|
"<item><guid>{guid}</guid><title>{}</title><link>{url}</link><description>{}</description><pubDate>{}</pubDate></item>",
|
||||||
|
escape(&title),
|
||||||
|
escape(&description),
|
||||||
|
time.to_rfc2822()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write final bytes
|
||||||
|
pub fn commit(mut self) -> String {
|
||||||
|
self.buffer.push_str("</channel></rss>");
|
||||||
|
self.buffer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// @TODO use tera filters?
|
||||||
|
// https://keats.github.io/tera/docs/#built-in-filters
|
||||||
|
|
||||||
|
fn escape(value: &str) -> String {
|
||||||
|
value
|
||||||
|
.replace('&', "&")
|
||||||
|
.replace('<', "<")
|
||||||
|
.replace('>', ">")
|
||||||
|
.replace('"', """)
|
||||||
|
.replace("'", "'")
|
||||||
|
}
|
||||||
9
crates/http/src/global.rs
Normal file
9
crates/http/src/global.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
use rocket::serde::Serialize;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(crate = "rocket::serde")]
|
||||||
|
pub struct Global {
|
||||||
|
pub format_time: String,
|
||||||
|
pub list_limit: usize,
|
||||||
|
pub provider_id: Option<u64>,
|
||||||
|
}
|
||||||
309
crates/http/src/main.rs
Normal file
309
crates/http/src/main.rs
Normal file
|
|
@ -0,0 +1,309 @@
|
||||||
|
#[macro_use]
|
||||||
|
extern crate rocket;
|
||||||
|
|
||||||
|
mod argument;
|
||||||
|
mod config;
|
||||||
|
mod feed;
|
||||||
|
mod global;
|
||||||
|
mod meta;
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use feed::Feed;
|
||||||
|
use global::Global;
|
||||||
|
use meta::Meta;
|
||||||
|
use mysql::{Database, table::Sort};
|
||||||
|
use rocket::{
|
||||||
|
State,
|
||||||
|
http::{ContentType, Status},
|
||||||
|
response::content::RawXml,
|
||||||
|
serde::Serialize,
|
||||||
|
};
|
||||||
|
use rocket_dyn_templates::{Template, context};
|
||||||
|
|
||||||
|
#[get("/?<search>&<page>")]
|
||||||
|
fn index(
|
||||||
|
search: Option<&str>,
|
||||||
|
page: Option<usize>,
|
||||||
|
db: &State<Database>,
|
||||||
|
meta: &State<Meta>,
|
||||||
|
global: &State<Global>,
|
||||||
|
) -> Result<Template, Status> {
|
||||||
|
#[derive(Serialize)]
|
||||||
|
#[serde(crate = "rocket::serde")]
|
||||||
|
struct Row {
|
||||||
|
channel_item_content_description_id: u64,
|
||||||
|
link: String,
|
||||||
|
time: String,
|
||||||
|
title: String,
|
||||||
|
}
|
||||||
|
let mut conn = db.connection().map_err(|e| {
|
||||||
|
error!("Could not connect database: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?;
|
||||||
|
let total = conn
|
||||||
|
.channel_item_content_descriptions_total_by_provider_id(global.provider_id, search)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!("Could not get contents total: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?;
|
||||||
|
Ok(Template::render(
|
||||||
|
"index",
|
||||||
|
context! {
|
||||||
|
title: {
|
||||||
|
let mut t = String::with_capacity(9);
|
||||||
|
if let Some(q) = search && !q.is_empty() {
|
||||||
|
t.push_str(q);
|
||||||
|
t.push_str(S);
|
||||||
|
t.push_str("Search");
|
||||||
|
t.push_str(S)
|
||||||
|
}
|
||||||
|
if let Some(p) = page && p > 1 {
|
||||||
|
t.push_str(&format!("Page {p}"));
|
||||||
|
t.push_str(S)
|
||||||
|
}
|
||||||
|
t.push_str(&meta.title);
|
||||||
|
if let Some(ref description) = meta.description
|
||||||
|
&& page.is_none_or(|p| p == 1) && search.is_none_or(|q| q.is_empty()) {
|
||||||
|
t.push_str(S);
|
||||||
|
t.push_str(description)
|
||||||
|
}
|
||||||
|
t
|
||||||
|
},
|
||||||
|
meta: meta.inner(),
|
||||||
|
back: page.map(|p| uri!(index(search, if p > 2 { Some(p - 1) } else { None }))),
|
||||||
|
next: if page.unwrap_or(1) * global.list_limit >= total { None }
|
||||||
|
else { Some(uri!(index(search, Some(page.map_or(2, |p| p + 1))))) },
|
||||||
|
rows: conn.channel_item_content_descriptions_by_provider_id(
|
||||||
|
global.provider_id,
|
||||||
|
search,
|
||||||
|
Sort::Desc,
|
||||||
|
page.map(|p| if p > 1 { p - 1 } else { 1 } * global.list_limit),
|
||||||
|
Some(global.list_limit)
|
||||||
|
).map_err(|e| {
|
||||||
|
error!("Could not get contents: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
.into_iter()
|
||||||
|
.map(|channel_item_content_description| {
|
||||||
|
let channel_item = conn.channel_item(
|
||||||
|
channel_item_content_description.channel_item_content_id
|
||||||
|
).unwrap().unwrap();
|
||||||
|
Row {
|
||||||
|
channel_item_content_description_id:
|
||||||
|
channel_item_content_description.channel_item_content_description_id,
|
||||||
|
link: channel_item.link,
|
||||||
|
time: time(channel_item.pub_date).format(&global.format_time).to_string(),
|
||||||
|
title: channel_item_content_description.title.unwrap_or_default(), // @TODO handle
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<Row>>(),
|
||||||
|
page: page.unwrap_or(1),
|
||||||
|
pages: (total as f64 / global.list_limit as f64).ceil(),
|
||||||
|
total,
|
||||||
|
search
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/<channel_item_content_description_id>")]
|
||||||
|
fn info(
|
||||||
|
channel_item_content_description_id: u64,
|
||||||
|
db: &State<Database>,
|
||||||
|
meta: &State<Meta>,
|
||||||
|
global: &State<Global>,
|
||||||
|
) -> Result<Template, Status> {
|
||||||
|
let mut conn = db.connection().map_err(|e| {
|
||||||
|
error!("Could not connect database: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?;
|
||||||
|
match conn.channel_item_content_description(channel_item_content_description_id).map_err(|e| {
|
||||||
|
error!("Could not get `channel_item_content_description_id` {channel_item_content_description_id}: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})? {
|
||||||
|
Some(channel_item_content_description) => {
|
||||||
|
let channel_item_content = conn
|
||||||
|
.channel_item_content(channel_item_content_description.channel_item_content_id)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(
|
||||||
|
"Could not get requested `channel_item_content` #{}: `{e}`",
|
||||||
|
channel_item_content_description.channel_item_content_id
|
||||||
|
);
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
error!(
|
||||||
|
"Could not find requested `channel_item_content` #{}",
|
||||||
|
channel_item_content_description.channel_item_content_id
|
||||||
|
);
|
||||||
|
Status::NotFound
|
||||||
|
})?;
|
||||||
|
let channel_item = conn
|
||||||
|
.channel_item(channel_item_content.channel_item_id)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(
|
||||||
|
"Could not get requested `channel_item` #{}: `{e}`",
|
||||||
|
channel_item_content.channel_item_id
|
||||||
|
);
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
error!(
|
||||||
|
"Could not find requested `channel_item` #{}",
|
||||||
|
channel_item_content.channel_item_id
|
||||||
|
);
|
||||||
|
Status::NotFound
|
||||||
|
})?;
|
||||||
|
let title = channel_item_content_description.title.unwrap_or_default(); // @TODO handle
|
||||||
|
Ok(Template::render(
|
||||||
|
"info",
|
||||||
|
context! {
|
||||||
|
description: channel_item_content_description.description,
|
||||||
|
link: channel_item.link,
|
||||||
|
meta: meta.inner(),
|
||||||
|
title: format!("{title}{S}{}", meta.title),
|
||||||
|
name: title,
|
||||||
|
time: time(channel_item.pub_date).format(&global.format_time).to_string(),
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
None => Err(Status::NotFound),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/image/<image_id>")]
|
||||||
|
fn image(image_id: u64, db: &State<Database>) -> Result<(ContentType, Vec<u8>), Status> {
|
||||||
|
let mut conn = db.connection().map_err(|e| {
|
||||||
|
error!("Could not connect database: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?;
|
||||||
|
match conn.image(image_id).map_err(|e| {
|
||||||
|
error!("Could not get content image `{image_id}`: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})? {
|
||||||
|
Some(image) => Ok((ContentType::Bytes, image.data)),
|
||||||
|
None => Err(Status::NotFound),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/rss?<search>")]
|
||||||
|
fn rss(
|
||||||
|
search: Option<&str>,
|
||||||
|
global: &State<Global>,
|
||||||
|
meta: &State<Meta>,
|
||||||
|
db: &State<Database>,
|
||||||
|
) -> Result<RawXml<String>, Status> {
|
||||||
|
let mut feed = Feed::new(
|
||||||
|
&meta.title,
|
||||||
|
meta.description.as_deref(),
|
||||||
|
1024, // @TODO
|
||||||
|
);
|
||||||
|
let mut conn = db.connection().map_err(|e| {
|
||||||
|
error!("Could not connect database: `{e}`");
|
||||||
|
Status::InternalServerError
|
||||||
|
})?;
|
||||||
|
for channel_item_content_description in conn
|
||||||
|
.channel_item_content_descriptions_by_provider_id(
|
||||||
|
global.provider_id,
|
||||||
|
search,
|
||||||
|
Sort::Desc,
|
||||||
|
None,
|
||||||
|
Some(global.list_limit),
|
||||||
|
)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(
|
||||||
|
"Could not load `channel_item_content_description` for `provider` #{:?}: `{e}`",
|
||||||
|
global.provider_id
|
||||||
|
);
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
{
|
||||||
|
let channel_item_content = conn
|
||||||
|
.channel_item_content(channel_item_content_description.channel_item_content_id)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(
|
||||||
|
"Could not get requested `channel_item_content` #{}: `{e}`",
|
||||||
|
channel_item_content_description.channel_item_content_id
|
||||||
|
);
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
error!(
|
||||||
|
"Could not find requested `channel_item_content` #{}",
|
||||||
|
channel_item_content_description.channel_item_content_id
|
||||||
|
);
|
||||||
|
Status::NotFound
|
||||||
|
})?;
|
||||||
|
let channel_item = conn
|
||||||
|
.channel_item(channel_item_content.channel_item_id)
|
||||||
|
.map_err(|e| {
|
||||||
|
error!(
|
||||||
|
"Could not get requested `channel_item` #{}: `{e}`",
|
||||||
|
channel_item_content.channel_item_id
|
||||||
|
);
|
||||||
|
Status::InternalServerError
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
error!(
|
||||||
|
"Could not find requested `channel_item` #{}",
|
||||||
|
channel_item_content.channel_item_id
|
||||||
|
);
|
||||||
|
Status::NotFound
|
||||||
|
})?;
|
||||||
|
feed.push(
|
||||||
|
channel_item_content_description.channel_item_content_description_id,
|
||||||
|
time(channel_item.pub_date),
|
||||||
|
channel_item.link,
|
||||||
|
channel_item_content_description.title.unwrap_or_default(), // @TODO handle
|
||||||
|
channel_item_content_description
|
||||||
|
.description
|
||||||
|
.unwrap_or_default(), // @TODO handle
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Ok(RawXml(feed.commit()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[launch]
|
||||||
|
fn rocket() -> _ {
|
||||||
|
use clap::Parser;
|
||||||
|
let argument = argument::Argument::parse();
|
||||||
|
let config: config::Config =
|
||||||
|
toml::from_str(&std::fs::read_to_string(argument.config).unwrap()).unwrap();
|
||||||
|
rocket::build()
|
||||||
|
.attach(Template::fairing())
|
||||||
|
.configure(rocket::Config {
|
||||||
|
port: config.port,
|
||||||
|
address: config.host,
|
||||||
|
..if config.debug {
|
||||||
|
rocket::Config::debug_default()
|
||||||
|
} else {
|
||||||
|
rocket::Config::release_default()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.manage(
|
||||||
|
Database::pool(
|
||||||
|
&config.mysql.host,
|
||||||
|
config.mysql.port,
|
||||||
|
&config.mysql.username,
|
||||||
|
&config.mysql.password,
|
||||||
|
&config.mysql.database,
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.manage(Global {
|
||||||
|
format_time: config.format_time,
|
||||||
|
list_limit: config.list_limit,
|
||||||
|
provider_id: config.provider_id,
|
||||||
|
})
|
||||||
|
.manage(Meta {
|
||||||
|
description: config.description,
|
||||||
|
title: config.title,
|
||||||
|
version: env!("CARGO_PKG_VERSION").into(),
|
||||||
|
})
|
||||||
|
.mount("/", routes![index, rss, info, image])
|
||||||
|
}
|
||||||
|
|
||||||
|
const S: &str = " • ";
|
||||||
|
|
||||||
|
fn time(timestamp: i64) -> DateTime<Utc> {
|
||||||
|
DateTime::<Utc>::from_timestamp(timestamp, 0).unwrap()
|
||||||
|
}
|
||||||
9
crates/http/src/meta.rs
Normal file
9
crates/http/src/meta.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
use rocket::serde::Serialize;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(crate = "rocket::serde")]
|
||||||
|
pub struct Meta {
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub title: String,
|
||||||
|
pub version: String,
|
||||||
|
}
|
||||||
21
crates/http/templates/index.html.tera
Normal file
21
crates/http/templates/index.html.tera
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{% extends "layout" %}
|
||||||
|
{% block content %}
|
||||||
|
{% if rows %}
|
||||||
|
{% for row in rows %}
|
||||||
|
<div>
|
||||||
|
<a name="{{ row.channel_item_content_description_id }}"></a>
|
||||||
|
<h2><a href="{{ row.channel_item_content_description_id }}">{{ row.title }}</a></h2>
|
||||||
|
<p>{{ row.time }}</p>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<div>
|
||||||
|
<p>Nothing.</p>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% if next %}<a href="{{ next }}">Next</a>{% endif %}
|
||||||
|
{% if back %}<a href="{{ back }}">Back</a>{% endif %}
|
||||||
|
{% if total %}
|
||||||
|
<p>Page {{ page }} / {{ pages }} ({{ total }} total)</p>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock content %}
|
||||||
10
crates/http/templates/info.html.tera
Normal file
10
crates/http/templates/info.html.tera
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
{% extends "layout" %}
|
||||||
|
{% block content %}
|
||||||
|
<div>
|
||||||
|
<h1>{{ name }}</h1>
|
||||||
|
<p><a href="{{ link }}">{{ time }}</a></p>
|
||||||
|
<div>
|
||||||
|
{{ description | safe }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock content %}
|
||||||
25
crates/http/templates/layout.html.tera
Normal file
25
crates/http/templates/layout.html.tera
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<title>{{ title }}</title>
|
||||||
|
{% if meta.description %}
|
||||||
|
<meta name="description" content="{{ meta.description }}" />
|
||||||
|
{% endif %}
|
||||||
|
<style>
|
||||||
|
* {color-scheme: light dark}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1><a href="/">{{ meta.title }}</a></h1>
|
||||||
|
<form action="/" method="GET">
|
||||||
|
<input type="text" name="search" value="{% if search %}{{ search }}{% endif %}" placeholder="Keyword..." />
|
||||||
|
<input type="submit" value="Search" />
|
||||||
|
</form>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
{% block content %}{% endblock content %}
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
22
crates/llm/Cargo.toml
Normal file
22
crates/llm/Cargo.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
[package]
|
||||||
|
name = "rssto-llm"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
license = "MIT"
|
||||||
|
readme = "README.md"
|
||||||
|
description = "LLM daemon for the rssto DB translations"
|
||||||
|
keywords = ["rss", "llm", "translation", "localization", "server"]
|
||||||
|
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
|
||||||
|
repository = "https://github.com/YGGverse/rssto"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.100"
|
||||||
|
chrono = "0.4.42"
|
||||||
|
clap = { version = "4.5.54", features = ["derive"] }
|
||||||
|
lancor = "0.1.1"
|
||||||
|
log = "0.4.29"
|
||||||
|
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transaction"], path = "../mysql" }
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
tokio = { version = "1.0", features = ["full"] }
|
||||||
|
toml = "0.9.10"
|
||||||
|
tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
|
||||||
21
crates/llm/LICENSE
Normal file
21
crates/llm/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 YGGverse
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
22
crates/llm/README.md
Normal file
22
crates/llm/README.md
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
# rssto-llm
|
||||||
|
|
||||||
|
LLM daemon for the rssto DB translations
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> In development!
|
||||||
|
|
||||||
|
1. Setup `rssto-crawler` first and collect initial data
|
||||||
|
|
||||||
|
2. Run LLM server:
|
||||||
|
|
||||||
|
```
|
||||||
|
llama-server -hf ggml-org/gemma-3-1b-it-GGUF
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Launch `rssto-llm` to handle `content` DB:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd rssto/crates/rssto-llm
|
||||||
|
cargo run -- -c /path/to/config.toml
|
||||||
|
```
|
||||||
|
* see `--help` to display all supported options
|
||||||
22
crates/llm/config.toml
Normal file
22
crates/llm/config.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
# Rescan database for new subjects, in seconds
|
||||||
|
# * process once if not defined
|
||||||
|
# update = 900
|
||||||
|
|
||||||
|
# Database connection setup
|
||||||
|
# * see crates/mysql/database
|
||||||
|
[mysql]
|
||||||
|
host = "localhost"
|
||||||
|
port = 3306
|
||||||
|
username = ""
|
||||||
|
password = ""
|
||||||
|
database = "rssto"
|
||||||
|
|
||||||
|
# LLM connection setup
|
||||||
|
[llm]
|
||||||
|
scheme = "http"
|
||||||
|
host = "127.0.0.1"
|
||||||
|
port = 8080
|
||||||
|
# Model name
|
||||||
|
model = "ggml-org/gemma-3-1b-it-GGUF"
|
||||||
|
# Initial message for the `content` subject (e.g. `translate to...`)
|
||||||
|
message = "translate to english:"
|
||||||
12
crates/llm/src/argument.rs
Normal file
12
crates/llm/src/argument.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
use clap::Parser;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
pub struct Argument {
|
||||||
|
/// Path to config file
|
||||||
|
///
|
||||||
|
/// * see `config.toml`
|
||||||
|
#[arg(short, long)]
|
||||||
|
pub config: PathBuf,
|
||||||
|
}
|
||||||
27
crates/llm/src/config.rs
Normal file
27
crates/llm/src/config.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Mysql {
|
||||||
|
pub database: String,
|
||||||
|
pub host: IpAddr,
|
||||||
|
pub password: String,
|
||||||
|
pub port: u16,
|
||||||
|
pub username: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Llm {
|
||||||
|
pub scheme: String,
|
||||||
|
pub host: IpAddr,
|
||||||
|
pub port: u16,
|
||||||
|
pub model: String,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub mysql: Mysql,
|
||||||
|
pub llm: Llm,
|
||||||
|
pub update: Option<u64>,
|
||||||
|
}
|
||||||
124
crates/llm/src/main.rs
Normal file
124
crates/llm/src/main.rs
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
mod argument;
|
||||||
|
mod config;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use mysql::Database;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
use chrono::Local;
|
||||||
|
use clap::Parser;
|
||||||
|
use lancor::{ChatCompletionRequest, LlamaCppClient, Message};
|
||||||
|
use log::{debug, info};
|
||||||
|
|
||||||
|
use std::env::var;
|
||||||
|
|
||||||
|
if var("RUST_LOG").is_ok() {
|
||||||
|
use tracing_subscriber::{EnvFilter, fmt::*};
|
||||||
|
struct T;
|
||||||
|
impl time::FormatTime for T {
|
||||||
|
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
|
||||||
|
write!(w, "{}", Local::now())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt()
|
||||||
|
.with_timer(T)
|
||||||
|
.with_env_filter(EnvFilter::from_default_env())
|
||||||
|
.init()
|
||||||
|
}
|
||||||
|
|
||||||
|
let argument = argument::Argument::parse();
|
||||||
|
let config: config::Config = toml::from_str(&std::fs::read_to_string(argument.config)?)?;
|
||||||
|
|
||||||
|
let llm = LlamaCppClient::new(format!(
|
||||||
|
"{}://{}:{}",
|
||||||
|
config.llm.scheme, config.llm.host, config.llm.port
|
||||||
|
))?;
|
||||||
|
let db = Database::pool(
|
||||||
|
&config.mysql.host.to_string(),
|
||||||
|
config.mysql.port,
|
||||||
|
&config.mysql.username,
|
||||||
|
&config.mysql.password,
|
||||||
|
&config.mysql.database,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let provider_id = {
|
||||||
|
let mut conn = db.connection()?;
|
||||||
|
match conn.provider_id_by_name(&config.llm.model)? {
|
||||||
|
Some(provider_id) => {
|
||||||
|
debug!(
|
||||||
|
"Use existing DB provider #{} matches model name `{}`",
|
||||||
|
provider_id, &config.llm.model
|
||||||
|
);
|
||||||
|
provider_id
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let provider_id = conn.insert_provider(&config.llm.model)?;
|
||||||
|
info!(
|
||||||
|
"Provider `{}` not found in database, created new one with ID `{provider_id}`",
|
||||||
|
&config.llm.model
|
||||||
|
);
|
||||||
|
provider_id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("Daemon started");
|
||||||
|
loop {
|
||||||
|
debug!("New queue begin...");
|
||||||
|
let mut tx = db.transaction()?;
|
||||||
|
for channel_item_content_description in
|
||||||
|
tx.channel_item_content_descriptions_queue_for_provider_id(provider_id)?
|
||||||
|
{
|
||||||
|
debug!(
|
||||||
|
"Begin generating `channel_item_content_description` #{} using `provider_id` #{provider_id}.",
|
||||||
|
channel_item_content_description.channel_item_content_description_id
|
||||||
|
);
|
||||||
|
let title = match channel_item_content_description.title {
|
||||||
|
Some(subject) => Some(
|
||||||
|
llm.chat_completion(ChatCompletionRequest::new(&config.llm.model).message(
|
||||||
|
Message::user(format!("{}\n{}", config.llm.message, subject)),
|
||||||
|
))
|
||||||
|
.await?
|
||||||
|
.choices[0]
|
||||||
|
.message
|
||||||
|
.content
|
||||||
|
.trim()
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
let description = match channel_item_content_description.description {
|
||||||
|
Some(subject) => Some(
|
||||||
|
llm.chat_completion(ChatCompletionRequest::new(&config.llm.model).message(
|
||||||
|
Message::user(format!("{}\n{}", config.llm.message, subject)),
|
||||||
|
))
|
||||||
|
.await?
|
||||||
|
.choices[0]
|
||||||
|
.message
|
||||||
|
.content
|
||||||
|
.trim()
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
let channel_item_content_description_id = tx.insert_channel_item_content_description(
|
||||||
|
channel_item_content_description.channel_item_content_id,
|
||||||
|
Some(provider_id),
|
||||||
|
title.as_deref(),
|
||||||
|
description.as_deref(),
|
||||||
|
)?;
|
||||||
|
info!(
|
||||||
|
"Create `channel_item_content_description` #{channel_item_content_description_id} by `provider_id` #{provider_id}."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
tx.commit()?;
|
||||||
|
debug!("Queue completed");
|
||||||
|
if let Some(update) = config.update {
|
||||||
|
debug!("Wait {update} seconds to continue...");
|
||||||
|
std::thread::sleep(std::time::Duration::from_secs(update))
|
||||||
|
} else {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
17
crates/mysql/Cargo.toml
Normal file
17
crates/mysql/Cargo.toml
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
[package]
|
||||||
|
name = "rssto-mysql"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
license = "MIT"
|
||||||
|
readme = "README.md"
|
||||||
|
description = "Shared MySQL database library"
|
||||||
|
keywords = ["rssto", "database", "mysql", "library", "api"]
|
||||||
|
# categories = []
|
||||||
|
repository = "https://github.com/YGGverse/rssto"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
transaction = []
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
mysql = "26.0.1"
|
||||||
21
crates/mysql/LICENSE
Normal file
21
crates/mysql/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 YGGverse
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
6
crates/mysql/README.md
Normal file
6
crates/mysql/README.md
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
# rssto-mysql
|
||||||
|
|
||||||
|
Shared MySQL database library
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> See `database.mwb` model or `version` directory to deploy
|
||||||
BIN
crates/mysql/database.mwb
Normal file
BIN
crates/mysql/database.mwb
Normal file
Binary file not shown.
143
crates/mysql/src/connection.rs
Normal file
143
crates/mysql/src/connection.rs
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
use crate::table::*;
|
||||||
|
use mysql::{Error, Pool, PooledConn, prelude::Queryable};
|
||||||
|
|
||||||
|
/// Safe, read-only operations used in client apps like `rssto-http`
|
||||||
|
pub struct Connection {
|
||||||
|
conn: PooledConn,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Connection {
|
||||||
|
pub fn create(pool: &Pool) -> Result<Self, Error> {
|
||||||
|
Ok(Self {
|
||||||
|
conn: pool.get_conn()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item(&mut self, channel_item_id: u64) -> Result<Option<ChannelItem>, Error> {
|
||||||
|
self.conn.exec_first(
|
||||||
|
"SELECT `channel_item_id`,
|
||||||
|
`channel_id`,
|
||||||
|
`pub_date`,
|
||||||
|
`guid`,
|
||||||
|
`link` FROM `channel_item` WHERE `channel_item_id` = ?",
|
||||||
|
(channel_item_id,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item_content(
|
||||||
|
&mut self,
|
||||||
|
channel_item_content_id: u64,
|
||||||
|
) -> Result<Option<ChannelItemContent>, Error> {
|
||||||
|
self.conn.exec_first(
|
||||||
|
"SELECT `channel_item_content_id`,
|
||||||
|
`channel_item_id`
|
||||||
|
FROM `channel_item_content` WHERE `channel_item_content_id` = ?",
|
||||||
|
(channel_item_content_id,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item_content_description(
|
||||||
|
&mut self,
|
||||||
|
channel_item_content_description_id: u64,
|
||||||
|
) -> Result<Option<ChannelItemContentDescription>, Error> {
|
||||||
|
self.conn.exec_first(
|
||||||
|
"SELECT `channel_item_content_description_id`,
|
||||||
|
`channel_item_content_id`,
|
||||||
|
`provider_id`,
|
||||||
|
`title`,
|
||||||
|
`description` FROM `channel_item_content_description`
|
||||||
|
WHERE `channel_item_content_description_id` = ?",
|
||||||
|
(channel_item_content_description_id,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item_content_descriptions_total_by_provider_id(
|
||||||
|
&mut self,
|
||||||
|
provider_id: Option<u64>,
|
||||||
|
keyword: Option<&str>,
|
||||||
|
) -> Result<usize, Error> {
|
||||||
|
let total: Option<usize> = match keyword {
|
||||||
|
Some(k) => self.conn.exec_first(
|
||||||
|
"SELECT COUNT(*) FROM `channel_item_content_description`
|
||||||
|
WHERE `provider_id` <=> ? AND `title` LIKE '%?%'",
|
||||||
|
(provider_id, k),
|
||||||
|
)?,
|
||||||
|
None => self.conn.exec_first(
|
||||||
|
"SELECT COUNT(*) FROM `channel_item_content_description`
|
||||||
|
WHERE `provider_id` <=> ?",
|
||||||
|
(provider_id,),
|
||||||
|
)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(total.unwrap_or(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item_content_descriptions_by_provider_id(
|
||||||
|
&mut self,
|
||||||
|
provider_id: Option<u64>,
|
||||||
|
keyword: Option<&str>,
|
||||||
|
sort: Sort,
|
||||||
|
start: Option<usize>,
|
||||||
|
limit: Option<usize>,
|
||||||
|
) -> Result<Vec<ChannelItemContentDescription>, Error> {
|
||||||
|
match keyword {
|
||||||
|
Some(k) => self.conn.exec(
|
||||||
|
format!(
|
||||||
|
"SELECT `channel_item_content_description_id`,
|
||||||
|
`channel_item_content_id`,
|
||||||
|
`provider_id`,
|
||||||
|
`title`,
|
||||||
|
`description`
|
||||||
|
FROM `channel_item_content_description`
|
||||||
|
WHERE `provider_id` <=> ? AND `title` LIKE '%?%'
|
||||||
|
ORDER BY `channel_item_content_description_id` {sort} LIMIT {},{}",
|
||||||
|
start.unwrap_or(0),
|
||||||
|
limit.unwrap_or(DEFAULT_LIMIT)
|
||||||
|
),
|
||||||
|
(provider_id, k),
|
||||||
|
),
|
||||||
|
None => self.conn.exec(
|
||||||
|
format!(
|
||||||
|
"SELECT `channel_item_content_description_id`,
|
||||||
|
`channel_item_content_id`,
|
||||||
|
`provider_id`,
|
||||||
|
`title`,
|
||||||
|
`description`
|
||||||
|
FROM `channel_item_content_description`
|
||||||
|
WHERE `provider_id` <=> ?
|
||||||
|
ORDER BY `channel_item_content_description_id` {sort} LIMIT {},{}",
|
||||||
|
start.unwrap_or(0),
|
||||||
|
limit.unwrap_or(DEFAULT_LIMIT)
|
||||||
|
),
|
||||||
|
(provider_id,),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn image(&mut self, image_id: u64) -> Result<Option<Image>, Error> {
|
||||||
|
self.conn.exec_first(
|
||||||
|
"SELECT `image_id`,
|
||||||
|
`provider_id`,
|
||||||
|
`sha256`,
|
||||||
|
`src`,
|
||||||
|
`url`,
|
||||||
|
`data` FROM `image` WHERE `image_id` = ?",
|
||||||
|
(image_id,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn provider_id_by_name(&mut self, name: &str) -> Result<Option<u64>, Error> {
|
||||||
|
self.conn.exec_first(
|
||||||
|
"SELECT `provider_id` FROM `provider` WHERE `name` = ?",
|
||||||
|
(name,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_provider(&mut self, name: &str) -> Result<u64, Error> {
|
||||||
|
self.conn
|
||||||
|
.exec_drop("INSERT INTO `provider` SET `name` = ?", (name,))?;
|
||||||
|
Ok(self.conn.last_insert_id())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_LIMIT: usize = 100;
|
||||||
36
crates/mysql/src/lib.rs
Normal file
36
crates/mysql/src/lib.rs
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
mod connection;
|
||||||
|
pub mod table;
|
||||||
|
#[cfg(feature = "transaction")]
|
||||||
|
mod transaction;
|
||||||
|
|
||||||
|
pub use connection::Connection;
|
||||||
|
#[cfg(feature = "transaction")]
|
||||||
|
pub use transaction::Transaction;
|
||||||
|
pub struct Database {
|
||||||
|
pool: mysql::Pool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Database {
|
||||||
|
pub fn pool(
|
||||||
|
host: &str,
|
||||||
|
port: u16,
|
||||||
|
user: &str,
|
||||||
|
password: &str,
|
||||||
|
database: &str,
|
||||||
|
) -> Result<Self, mysql::Error> {
|
||||||
|
Ok(Self {
|
||||||
|
pool: mysql::Pool::new(
|
||||||
|
format!("mysql://{user}:{password}@{host}:{port}/{database}").as_str(),
|
||||||
|
)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn connection(&self) -> Result<Connection, mysql::Error> {
|
||||||
|
Connection::create(&self.pool)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "transaction")]
|
||||||
|
pub fn transaction(&self) -> Result<Transaction, mysql::Error> {
|
||||||
|
Transaction::create(&self.pool)
|
||||||
|
}
|
||||||
|
}
|
||||||
74
crates/mysql/src/table.rs
Normal file
74
crates/mysql/src/table.rs
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
use mysql::prelude::FromRow;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct Channel {
|
||||||
|
pub channel_id: u64,
|
||||||
|
pub url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct ChannelItem {
|
||||||
|
pub channel_item_id: u64,
|
||||||
|
pub channel_id: u64,
|
||||||
|
pub pub_date: i64,
|
||||||
|
pub guid: String,
|
||||||
|
pub link: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct ChannelItemDescription {
|
||||||
|
pub channel_item_description_id: u64,
|
||||||
|
pub channel_item_id: u64,
|
||||||
|
pub provider_id: Option<u64>,
|
||||||
|
pub title: Option<String>,
|
||||||
|
pub description: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct ChannelItemContent {
|
||||||
|
pub channel_item_content_id: u64,
|
||||||
|
pub channel_item_id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct ChannelItemContentDescription {
|
||||||
|
pub channel_item_content_description_id: u64,
|
||||||
|
pub channel_item_content_id: u64,
|
||||||
|
pub provider_id: Option<u64>,
|
||||||
|
pub title: Option<String>,
|
||||||
|
pub description: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct Provider {
|
||||||
|
pub provider_id: u64,
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||||
|
pub struct Image {
|
||||||
|
pub image_id: u64,
|
||||||
|
pub provider_id: Option<u64>,
|
||||||
|
/// Keep image unique by comparing its data hash
|
||||||
|
pub sha256: String,
|
||||||
|
/// Original `src` tag value to post-replacing
|
||||||
|
pub src: Option<String>,
|
||||||
|
/// Resolved absolute URL
|
||||||
|
pub url: Option<String>,
|
||||||
|
/// Image data, MEDIUMBLOB (16M)
|
||||||
|
pub data: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Sort {
|
||||||
|
Asc,
|
||||||
|
Desc,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Sort {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Asc => write!(f, "ASC"),
|
||||||
|
Self::Desc => write!(f, "DESC"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
195
crates/mysql/src/transaction.rs
Normal file
195
crates/mysql/src/transaction.rs
Normal file
|
|
@ -0,0 +1,195 @@
|
||||||
|
use crate::table::*;
|
||||||
|
use mysql::{Error, Pool, TxOpts, prelude::Queryable};
|
||||||
|
|
||||||
|
/// Safe, optimized read/write operations
|
||||||
|
/// mostly required by the `rssto-crawler` and `rssto-llm`
|
||||||
|
/// * all members implementation requires `commit` action
|
||||||
|
pub struct Transaction {
|
||||||
|
tx: mysql::Transaction<'static>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Transaction {
|
||||||
|
pub fn create(pool: &Pool) -> Result<Self, Error> {
|
||||||
|
Ok(Self {
|
||||||
|
tx: pool.start_transaction(TxOpts::default())?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn commit(self) -> Result<(), Error> {
|
||||||
|
self.tx.commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn rollback(self) -> Result<(), Error> {
|
||||||
|
self.tx.rollback()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_id_by_url(&mut self, url: &str) -> Result<Option<u64>, Error> {
|
||||||
|
self.tx.exec_first(
|
||||||
|
"SELECT `channel_id` FROM `channel` WHERE `url` = ? LIMIT 1",
|
||||||
|
(url,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel(&mut self, url: &str) -> Result<u64, Error> {
|
||||||
|
self.tx
|
||||||
|
.exec_drop("INSERT INTO `channel` SET `url` = ?", (url,))?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel_description(
|
||||||
|
&mut self,
|
||||||
|
channel_id: u64,
|
||||||
|
provider_id: Option<u64>,
|
||||||
|
title: Option<String>,
|
||||||
|
description: Option<String>,
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_description` SET `channel_id` = ?,
|
||||||
|
`provider_id` = ?,
|
||||||
|
`title` = ?,
|
||||||
|
`description` = ?",
|
||||||
|
(channel_id, provider_id, title, description),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_items_total_by_channel_id_guid(
|
||||||
|
&mut self,
|
||||||
|
channel_id: u64,
|
||||||
|
guid: &str,
|
||||||
|
) -> Result<usize, Error> {
|
||||||
|
Ok(self
|
||||||
|
.tx
|
||||||
|
.exec_first(
|
||||||
|
"SELECT COUNT(*) FROM `channel_item` WHERE `channel_id` = ? AND `guid` = ?",
|
||||||
|
(channel_id, guid),
|
||||||
|
)?
|
||||||
|
.unwrap_or(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel_item(
|
||||||
|
&mut self,
|
||||||
|
channel_id: u64,
|
||||||
|
pub_date: i64,
|
||||||
|
guid: &str,
|
||||||
|
link: &str,
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_item` SET `channel_id` = ?,
|
||||||
|
`pub_date` = ?,
|
||||||
|
`guid` = ?,
|
||||||
|
`link` = ?",
|
||||||
|
(channel_id, pub_date, guid, link),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel_item_description(
|
||||||
|
&mut self,
|
||||||
|
channel_item_id: u64,
|
||||||
|
provider_id: Option<u64>,
|
||||||
|
title: Option<String>,
|
||||||
|
description: Option<String>,
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_item_description` SET `channel_item_id` = ?,
|
||||||
|
`provider_id` = ?,
|
||||||
|
`title` = ?,
|
||||||
|
`description` = ?",
|
||||||
|
(channel_item_id, provider_id, title, description),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn channel_item_content_descriptions_queue_for_provider_id(
|
||||||
|
&mut self,
|
||||||
|
provider_id: u64,
|
||||||
|
) -> Result<Vec<ChannelItemContentDescription>, Error> {
|
||||||
|
self.tx.exec(
|
||||||
|
"SELECT `t1`.`channel_item_content_description_id`,
|
||||||
|
`t1`.`channel_item_content_id`,
|
||||||
|
`t1`.`provider_id`,
|
||||||
|
`t1`.`title`,
|
||||||
|
`t1`.`description`
|
||||||
|
FROM `channel_item_content_description` AS `t1`
|
||||||
|
WHERE `t1`.`provider_id` IS NULL AND NOT EXISTS (
|
||||||
|
SELECT NULL FROM `channel_item_content_description` AS `t2`
|
||||||
|
WHERE `t2`.`channel_item_content_description_id` = `t1`.`channel_item_content_description_id`
|
||||||
|
AND `t2`.`provider_id` = ? LIMIT 1
|
||||||
|
)",
|
||||||
|
(provider_id,),
|
||||||
|
)
|
||||||
|
} // @TODO upgrade to the latest version
|
||||||
|
|
||||||
|
pub fn insert_channel_item_content(&mut self, channel_item_id: u64) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_item_content` SET `channel_item_id` = ?",
|
||||||
|
(channel_item_id,),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel_item_content_description(
|
||||||
|
&mut self,
|
||||||
|
channel_item_content_id: u64,
|
||||||
|
provider_id: Option<u64>,
|
||||||
|
title: Option<&str>,
|
||||||
|
description: Option<&str>,
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_item_content_description` SET `channel_item_content_id` = ?,
|
||||||
|
`provider_id` = ?,
|
||||||
|
`title` = ?,
|
||||||
|
`description` = ?",
|
||||||
|
(channel_item_content_id, provider_id, title, description),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn replace_channel_item_content_description(
|
||||||
|
&mut self,
|
||||||
|
channel_item_content_description_id: u64,
|
||||||
|
from: &str,
|
||||||
|
to: &str,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"UPDATE `channel_item_content_description`
|
||||||
|
SET `description` = REPLACE(`description`, ?, ?)
|
||||||
|
WHERE `channel_item_content_description_id` = ?",
|
||||||
|
(from, to, channel_item_content_description_id),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_channel_item_content_image(
|
||||||
|
&mut self,
|
||||||
|
channel_item_content_id: u64,
|
||||||
|
image_id: u64,
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `channel_item_content_image` SET `channel_item_content_id` = ?, `image_id` = ?",
|
||||||
|
(channel_item_content_id, image_id),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn image_id_by_sha256(&mut self, sha256: &str) -> Result<Option<u64>, Error> {
|
||||||
|
self.tx.exec_first(
|
||||||
|
"SELECT `image_id` FROM `image` WHERE `sha256` = ? LIMIT 1",
|
||||||
|
(sha256,),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_image(
|
||||||
|
&mut self,
|
||||||
|
sha256: &str,
|
||||||
|
src: Option<&str>,
|
||||||
|
url: Option<&str>,
|
||||||
|
data: &[u8],
|
||||||
|
) -> Result<u64, Error> {
|
||||||
|
self.tx.exec_drop(
|
||||||
|
"INSERT INTO `image` SET `sha256` = ?, `src` = ?, `url` = ?, `data` = ?",
|
||||||
|
(sha256, src, url, data),
|
||||||
|
)?;
|
||||||
|
Ok(self.tx.last_insert_id().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
202
crates/mysql/version/0.1.0.sql
Normal file
202
crates/mysql/version/0.1.0.sql
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
-- MySQL Script generated by MySQL Workbench
|
||||||
|
-- нд, 11-січ-2026 21:01:10 +0200
|
||||||
|
-- Model: New Model Version: 1.0
|
||||||
|
-- MySQL Workbench Forward Engineering
|
||||||
|
|
||||||
|
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
|
||||||
|
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
|
||||||
|
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Schema rssto
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Schema rssto
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE SCHEMA IF NOT EXISTS `rssto` ;
|
||||||
|
USE `rssto` ;
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel` (
|
||||||
|
`channel_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`url` VARCHAR(255) NOT NULL,
|
||||||
|
PRIMARY KEY (`channel_id`),
|
||||||
|
UNIQUE INDEX `url_UNIQUE` (`url` ASC) VISIBLE)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_item`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_item` (
|
||||||
|
`channel_item_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_id` INT UNSIGNED NOT NULL,
|
||||||
|
`pub_date` BIGINT NOT NULL,
|
||||||
|
`guid` VARCHAR(255) NOT NULL,
|
||||||
|
`link` VARCHAR(255) NOT NULL,
|
||||||
|
PRIMARY KEY (`channel_item_id`, `channel_id`),
|
||||||
|
INDEX `fk_channel_item_channel_idx` (`channel_id` ASC) VISIBLE,
|
||||||
|
UNIQUE INDEX `UNIQUE` (`guid` ASC, `channel_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_item_channel`
|
||||||
|
FOREIGN KEY (`channel_id`)
|
||||||
|
REFERENCES `rssto`.`channel` (`channel_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`provider`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`provider` (
|
||||||
|
`provider_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`name` VARCHAR(255) NOT NULL,
|
||||||
|
PRIMARY KEY (`provider_id`),
|
||||||
|
UNIQUE INDEX `name_UNIQUE` (`name` ASC) VISIBLE)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_item_content`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_item_content` (
|
||||||
|
`channel_item_content_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_item_id` INT UNSIGNED NOT NULL,
|
||||||
|
PRIMARY KEY (`channel_item_content_id`, `channel_item_id`),
|
||||||
|
INDEX `fk_channel_item_content_channel_item_idx` (`channel_item_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_item_content_channel_item`
|
||||||
|
FOREIGN KEY (`channel_item_id`)
|
||||||
|
REFERENCES `rssto`.`channel_item` (`channel_item_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`image`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`image` (
|
||||||
|
`image_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`provider_id` INT UNSIGNED NULL,
|
||||||
|
`sha256` CHAR(64) NOT NULL,
|
||||||
|
`src` VARCHAR(2048) NULL,
|
||||||
|
`url` VARCHAR(2048) NULL,
|
||||||
|
`data` MEDIUMBLOB NOT NULL,
|
||||||
|
PRIMARY KEY (`image_id`),
|
||||||
|
UNIQUE INDEX `hash_UNIQUE` (`sha256` ASC) VISIBLE,
|
||||||
|
INDEX `fk_image_provider_idx` (`provider_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_image_provider`
|
||||||
|
FOREIGN KEY (`provider_id`)
|
||||||
|
REFERENCES `rssto`.`provider` (`provider_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_item_content_image`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_item_content_image` (
|
||||||
|
`channel_item_content_image_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_item_content_id` BIGINT UNSIGNED NOT NULL,
|
||||||
|
`image_id` BIGINT UNSIGNED NOT NULL,
|
||||||
|
PRIMARY KEY (`channel_item_content_image_id`),
|
||||||
|
INDEX `fk_channel_item_content_image_channel_item_content_idx` (`channel_item_content_id` ASC) VISIBLE,
|
||||||
|
INDEX `fk_channel_item_content_image_image_idx` (`image_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_item_content_image_channel_item_content`
|
||||||
|
FOREIGN KEY (`channel_item_content_id`)
|
||||||
|
REFERENCES `rssto`.`channel_item_content` (`channel_item_content_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION,
|
||||||
|
CONSTRAINT `fk_channel_item_content_image_image`
|
||||||
|
FOREIGN KEY (`image_id`)
|
||||||
|
REFERENCES `rssto`.`image` (`image_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_description`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_description` (
|
||||||
|
`channel_description_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_id` INT UNSIGNED NOT NULL,
|
||||||
|
`provider_id` INT UNSIGNED NULL,
|
||||||
|
`title` TEXT NULL,
|
||||||
|
`description` LONGTEXT NULL,
|
||||||
|
PRIMARY KEY (`channel_description_id`),
|
||||||
|
INDEX `fk_channel_description_provider_idx` (`provider_id` ASC) VISIBLE,
|
||||||
|
INDEX `fk_channel_description_channel_idx` (`channel_id` ASC) VISIBLE,
|
||||||
|
UNIQUE INDEX `UNIQUE` (`channel_id` ASC, `provider_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_description_provider`
|
||||||
|
FOREIGN KEY (`provider_id`)
|
||||||
|
REFERENCES `rssto`.`provider` (`provider_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION,
|
||||||
|
CONSTRAINT `fk_channel_description_channel`
|
||||||
|
FOREIGN KEY (`channel_id`)
|
||||||
|
REFERENCES `rssto`.`channel` (`channel_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_item_description`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_item_description` (
|
||||||
|
`channel_item_description_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_item_id` INT UNSIGNED NOT NULL,
|
||||||
|
`provider_id` INT UNSIGNED NULL,
|
||||||
|
`title` TEXT NULL,
|
||||||
|
`description` LONGTEXT NULL,
|
||||||
|
INDEX `fk_channel_item_description_channel_item_idx` (`channel_item_id` ASC) VISIBLE,
|
||||||
|
INDEX `fk_channel_item_description_provider_idx` (`provider_id` ASC) VISIBLE,
|
||||||
|
PRIMARY KEY (`channel_item_description_id`),
|
||||||
|
UNIQUE INDEX `UNIQUE` (`channel_item_id` ASC, `provider_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_item_description_channel_item`
|
||||||
|
FOREIGN KEY (`channel_item_id`)
|
||||||
|
REFERENCES `rssto`.`channel_item` (`channel_item_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION,
|
||||||
|
CONSTRAINT `fk_channel_item_description_provider`
|
||||||
|
FOREIGN KEY (`provider_id`)
|
||||||
|
REFERENCES `rssto`.`provider` (`provider_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
-- Table `rssto`.`channel_item_content_description`
|
||||||
|
-- -----------------------------------------------------
|
||||||
|
CREATE TABLE IF NOT EXISTS `rssto`.`channel_item_content_description` (
|
||||||
|
`channel_item_content_description_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
`channel_item_content_id` BIGINT UNSIGNED NOT NULL,
|
||||||
|
`provider_id` INT UNSIGNED NULL,
|
||||||
|
`title` TEXT NULL,
|
||||||
|
`description` LONGTEXT NULL,
|
||||||
|
PRIMARY KEY (`channel_item_content_description_id`),
|
||||||
|
INDEX `fk_channel_item_content_description_channel_item_content_idx` (`channel_item_content_id` ASC) VISIBLE,
|
||||||
|
INDEX `fk_channel_item_content_description_provider_idx` (`provider_id` ASC) VISIBLE,
|
||||||
|
UNIQUE INDEX `UNIQUE` (`channel_item_content_id` ASC, `provider_id` ASC) VISIBLE,
|
||||||
|
CONSTRAINT `fk_channel_item_content_description_channel_item_content`
|
||||||
|
FOREIGN KEY (`channel_item_content_id`)
|
||||||
|
REFERENCES `rssto`.`channel_item_content` (`channel_item_content_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION,
|
||||||
|
CONSTRAINT `fk_channel_item_content_description_provider`
|
||||||
|
FOREIGN KEY (`provider_id`)
|
||||||
|
REFERENCES `rssto`.`provider` (`provider_id`)
|
||||||
|
ON DELETE NO ACTION
|
||||||
|
ON UPDATE NO ACTION)
|
||||||
|
ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|
||||||
|
SET SQL_MODE=@OLD_SQL_MODE;
|
||||||
|
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
|
||||||
|
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
use serde::Deserialize;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
|
||||||
pub struct Feed {
|
|
||||||
/// RSS feed source
|
|
||||||
pub url: Url,
|
|
||||||
|
|
||||||
/// Destination directory
|
|
||||||
pub storage: PathBuf,
|
|
||||||
|
|
||||||
/// Path to templates (export formats)
|
|
||||||
pub templates: Vec<PathBuf>,
|
|
||||||
|
|
||||||
/// Limit channel items (unlimited by default)
|
|
||||||
pub list_items_limit: Option<usize>,
|
|
||||||
|
|
||||||
pub pub_date_format: String,
|
|
||||||
pub last_build_date_format: String,
|
|
||||||
pub time_generated_format: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
|
||||||
pub struct Config {
|
|
||||||
pub feed: Vec<Feed>,
|
|
||||||
|
|
||||||
/// Update timeout in seconds
|
|
||||||
///
|
|
||||||
/// * None to generate once
|
|
||||||
pub update: Option<u64>,
|
|
||||||
}
|
|
||||||
148
src/main.rs
148
src/main.rs
|
|
@ -1,148 +0,0 @@
|
||||||
mod argument;
|
|
||||||
mod config;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
|
||||||
use argument::Argument;
|
|
||||||
use chrono::{DateTime, Local};
|
|
||||||
use clap::Parser;
|
|
||||||
use config::{Config, Feed};
|
|
||||||
use log::{debug, info, warn};
|
|
||||||
use std::{
|
|
||||||
env::var,
|
|
||||||
fs::{File, create_dir_all, read_to_string},
|
|
||||||
io::Write,
|
|
||||||
path::PathBuf,
|
|
||||||
};
|
|
||||||
use strip_tags::*;
|
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
|
||||||
if var("RUST_LOG").is_ok() {
|
|
||||||
use tracing_subscriber::{EnvFilter, fmt::*};
|
|
||||||
struct T;
|
|
||||||
impl time::FormatTime for T {
|
|
||||||
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
|
|
||||||
write!(w, "{}", Local::now())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fmt()
|
|
||||||
.with_timer(T)
|
|
||||||
.with_env_filter(EnvFilter::from_default_env())
|
|
||||||
.init()
|
|
||||||
}
|
|
||||||
|
|
||||||
let argument = Argument::parse();
|
|
||||||
let config: Config = toml::from_str(&read_to_string(argument.config)?)?;
|
|
||||||
|
|
||||||
info!("Crawler started");
|
|
||||||
|
|
||||||
loop {
|
|
||||||
debug!("Begin new crawl queue...");
|
|
||||||
|
|
||||||
for feed in &config.feed {
|
|
||||||
debug!("Update `{}`...", feed.url);
|
|
||||||
if let Err(e) = crawl(feed) {
|
|
||||||
warn!("Feed `{}` update failed: `{e}`", feed.url)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Crawl queue completed");
|
|
||||||
|
|
||||||
if let Some(update) = config.update {
|
|
||||||
debug!("Wait {update} seconds to continue...",);
|
|
||||||
std::thread::sleep(std::time::Duration::from_secs(update))
|
|
||||||
} else {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn crawl(feed: &Feed) -> Result<()> {
|
|
||||||
use reqwest::blocking::get;
|
|
||||||
use rss::Channel;
|
|
||||||
|
|
||||||
let channel = Channel::read_from(&get(feed.url.as_str())?.bytes()?[..])?;
|
|
||||||
let channel_items = channel.items();
|
|
||||||
let channel_items_limit = feed.list_items_limit.unwrap_or(channel_items.len());
|
|
||||||
let regex = regex::Regex::new(r"\n{3,}").unwrap();
|
|
||||||
|
|
||||||
for template in &feed.templates {
|
|
||||||
let root = PathBuf::from(template);
|
|
||||||
let extension = root.file_name().unwrap().to_string_lossy();
|
|
||||||
|
|
||||||
let index = {
|
|
||||||
let mut p = PathBuf::from(&root);
|
|
||||||
p.push(format!("index.{extension}"));
|
|
||||||
read_to_string(p)?
|
|
||||||
};
|
|
||||||
|
|
||||||
let index_item = {
|
|
||||||
let mut p = PathBuf::from(&root);
|
|
||||||
p.push("index");
|
|
||||||
p.push(format!("item.{extension}"));
|
|
||||||
read_to_string(p)?
|
|
||||||
};
|
|
||||||
|
|
||||||
create_dir_all(&feed.storage)?;
|
|
||||||
File::create({
|
|
||||||
let mut p = PathBuf::from(&feed.storage);
|
|
||||||
p.push(format!("index.{extension}"));
|
|
||||||
p
|
|
||||||
})?
|
|
||||||
.write_all(
|
|
||||||
index
|
|
||||||
.replace("{title}", &strip_tags(channel.title()))
|
|
||||||
.replace("{description}", &strip_tags(channel.description()))
|
|
||||||
.replace("{link}", channel.link())
|
|
||||||
.replace("{language}", channel.language().unwrap_or_default())
|
|
||||||
.replace(
|
|
||||||
"{pub_date}",
|
|
||||||
&time(channel.pub_date(), &feed.pub_date_format),
|
|
||||||
)
|
|
||||||
.replace(
|
|
||||||
"{last_build_date}",
|
|
||||||
&time(channel.last_build_date(), &feed.last_build_date_format),
|
|
||||||
)
|
|
||||||
.replace("{time_generated}", &time(None, &feed.time_generated_format))
|
|
||||||
.replace(
|
|
||||||
"{items}",
|
|
||||||
&channel_items
|
|
||||||
.iter()
|
|
||||||
.take(channel_items_limit)
|
|
||||||
.map(|i| {
|
|
||||||
regex
|
|
||||||
.replace_all(
|
|
||||||
&index_item
|
|
||||||
.replace(
|
|
||||||
"{title}",
|
|
||||||
&strip_tags(i.title().unwrap_or_default()),
|
|
||||||
)
|
|
||||||
.replace(
|
|
||||||
"{description}",
|
|
||||||
&strip_tags(i.description().unwrap_or_default()),
|
|
||||||
)
|
|
||||||
.replace("{link}", i.link().unwrap_or_default())
|
|
||||||
.replace(
|
|
||||||
"{pub_date}",
|
|
||||||
&time(i.pub_date(), &feed.pub_date_format),
|
|
||||||
),
|
|
||||||
"\n\n",
|
|
||||||
)
|
|
||||||
.to_string()
|
|
||||||
})
|
|
||||||
.collect::<String>(),
|
|
||||||
)
|
|
||||||
.as_bytes(),
|
|
||||||
)?
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn time(value: Option<&str>, format: &str) -> String {
|
|
||||||
match value {
|
|
||||||
Some(v) => DateTime::parse_from_rfc2822(v).unwrap(),
|
|
||||||
None => Local::now().into(),
|
|
||||||
}
|
|
||||||
.format(format)
|
|
||||||
.to_string()
|
|
||||||
}
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
# {title}
|
|
||||||
|
|
||||||
{description}
|
|
||||||
|
|
||||||
## {time_generated}
|
|
||||||
|
|
||||||
{items}
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
|
|
||||||
### {title}
|
|
||||||
|
|
||||||
{description}
|
|
||||||
|
|
||||||
=> {link} {pub_date}
|
|
||||||
|
|
@ -1,49 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="{language}">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>{title}</title>
|
|
||||||
<style>
|
|
||||||
* {
|
|
||||||
color-scheme: light dark
|
|
||||||
}
|
|
||||||
body {
|
|
||||||
margin: 0 auto;
|
|
||||||
max-width: 1024px
|
|
||||||
}
|
|
||||||
header {
|
|
||||||
border-bottom: 1px #ccc dotted;
|
|
||||||
padding-bottom: 32px;
|
|
||||||
}
|
|
||||||
section > article {
|
|
||||||
border-bottom: 1px #ccc dotted;
|
|
||||||
padding-bottom: 32px;
|
|
||||||
}
|
|
||||||
footer {
|
|
||||||
font-size: small;
|
|
||||||
padding: 16px 0;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<header>
|
|
||||||
<h1>{title}</h1>
|
|
||||||
{description}
|
|
||||||
</header>
|
|
||||||
<section>
|
|
||||||
{items}
|
|
||||||
</section>
|
|
||||||
<footer>
|
|
||||||
<p>
|
|
||||||
Source: <a href="{link}">{title}</a> |
|
|
||||||
Updated: {pub_date} |
|
|
||||||
Build: {last_build_date} |
|
|
||||||
Generated: {time_generated}
|
|
||||||
</p>
|
|
||||||
<p>
|
|
||||||
Powered by <a href="https://github.com/YGGverse/rssto">rssto</a>.
|
|
||||||
</p>
|
|
||||||
</footer>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
<article>
|
|
||||||
<h2>{title}</h2>
|
|
||||||
<p>{description}</p>
|
|
||||||
<a href="{link}">{pub_date}</a>
|
|
||||||
</article>
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue