optimize db api

This commit is contained in:
yggverse 2026-01-10 01:41:20 +02:00
parent f48e256fad
commit ee083dfc45
11 changed files with 215 additions and 192 deletions

View file

@ -14,7 +14,7 @@ anyhow = "1.0.100"
chrono = "0.4.42"
clap = { version = "4.5.54", features = ["derive"] }
log = "0.4.29"
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transactional"], path = "../mysql" }
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transaction"], path = "../mysql" }
reqwest = { version = "0.13.1", features = ["blocking"] }
rss = "2.0.12"
scraper = { version = "0.25.0", features = ["serde"] }

View file

@ -3,11 +3,9 @@ mod config;
use anyhow::Result;
use log::{debug, info, warn};
use mysql::Transactional;
use reqwest::blocking::get;
fn main() -> Result<()> {
use argument::Argument;
use chrono::Local;
use clap::Parser;
use std::{env::var, fs::read_to_string};
@ -26,29 +24,29 @@ fn main() -> Result<()> {
.init()
}
let argument = Argument::parse();
let argument = argument::Argument::parse();
let config: config::Config = toml::from_str(&read_to_string(argument.config)?)?;
let db = mysql::Database::pool(
&config.mysql.host,
config.mysql.port,
&config.mysql.user,
&config.mysql.password,
&config.mysql.database,
)?;
info!("Crawler started");
loop {
debug!("Begin new crawl queue...");
{
// disconnect from the database immediately when exiting this scope,
// in case the `update` queue is enabled and pending for a while.
let mut db = Transactional::connect(
&config.mysql.host,
config.mysql.port,
&config.mysql.user,
&config.mysql.password,
&config.mysql.database,
)?;
for c in &config.channel {
debug!("Update `{}`...", c.url);
if let Err(e) = crawl(&mut db, c) {
warn!("Channel `{}` update failed: `{e}`", c.url)
for c in &config.channel {
debug!("Update `{}`...", c.url);
let mut tx = db.transaction()?;
match crawl(&mut tx, c) {
Ok(()) => tx.commit()?,
Err(e) => {
warn!("Channel `{}` update failed: `{e}`", c.url);
tx.rollback()?
}
}
db.commit()?
}
debug!("Crawl queue completed");
if let Some(update) = config.update {
@ -60,7 +58,7 @@ fn main() -> Result<()> {
}
}
fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()> {
fn crawl(tx: &mut mysql::Transaction, channel_config: &config::Channel) -> Result<()> {
use rss::Channel;
use scraper::Selector;
@ -87,9 +85,9 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
let channel_items_limit = channel_config.items_limit.unwrap_or(channel_items.len());
let channel_id = match db.channel_id_by_url(&channel_url)? {
let channel_id = match tx.channel_id_by_url(&channel_url)? {
Some(channel_id) => channel_id,
None => db.insert_channel(&channel_url)?,
None => tx.insert_channel(&channel_url)?,
};
for channel_item in channel_items.iter().take(channel_items_limit) {
@ -120,10 +118,10 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
continue;
}
};
if db.channel_items_total_by_channel_id_guid(channel_id, guid)? > 0 {
if tx.channel_items_total_by_channel_id_guid(channel_id, guid)? > 0 {
continue; // skip next steps as processed
}
let channel_item_id = db.insert_channel_item(
let channel_item_id = tx.insert_channel_item(
channel_id,
pub_date,
guid,
@ -188,7 +186,7 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
}
},
};
let _content_id = db.insert_content(channel_item_id, None, &title, &description)?;
let _content_id = tx.insert_content(channel_item_id, None, &title, &description)?;
// @TODO preload media
}
Ok(())