mirror of
https://github.com/YGGverse/rssto.git
synced 2026-03-31 17:15:29 +00:00
optimize db api
This commit is contained in:
parent
f48e256fad
commit
ee083dfc45
11 changed files with 215 additions and 192 deletions
|
|
@ -14,7 +14,7 @@ anyhow = "1.0.100"
|
|||
chrono = "0.4.42"
|
||||
clap = { version = "4.5.54", features = ["derive"] }
|
||||
log = "0.4.29"
|
||||
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transactional"], path = "../mysql" }
|
||||
mysql = { package = "rssto-mysql", version = "0.1.0", features = ["transaction"], path = "../mysql" }
|
||||
reqwest = { version = "0.13.1", features = ["blocking"] }
|
||||
rss = "2.0.12"
|
||||
scraper = { version = "0.25.0", features = ["serde"] }
|
||||
|
|
|
|||
|
|
@ -3,11 +3,9 @@ mod config;
|
|||
|
||||
use anyhow::Result;
|
||||
use log::{debug, info, warn};
|
||||
use mysql::Transactional;
|
||||
use reqwest::blocking::get;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
use argument::Argument;
|
||||
use chrono::Local;
|
||||
use clap::Parser;
|
||||
use std::{env::var, fs::read_to_string};
|
||||
|
|
@ -26,29 +24,29 @@ fn main() -> Result<()> {
|
|||
.init()
|
||||
}
|
||||
|
||||
let argument = Argument::parse();
|
||||
let argument = argument::Argument::parse();
|
||||
let config: config::Config = toml::from_str(&read_to_string(argument.config)?)?;
|
||||
let db = mysql::Database::pool(
|
||||
&config.mysql.host,
|
||||
config.mysql.port,
|
||||
&config.mysql.user,
|
||||
&config.mysql.password,
|
||||
&config.mysql.database,
|
||||
)?;
|
||||
|
||||
info!("Crawler started");
|
||||
loop {
|
||||
debug!("Begin new crawl queue...");
|
||||
{
|
||||
// disconnect from the database immediately when exiting this scope,
|
||||
// in case the `update` queue is enabled and pending for a while.
|
||||
let mut db = Transactional::connect(
|
||||
&config.mysql.host,
|
||||
config.mysql.port,
|
||||
&config.mysql.user,
|
||||
&config.mysql.password,
|
||||
&config.mysql.database,
|
||||
)?;
|
||||
for c in &config.channel {
|
||||
debug!("Update `{}`...", c.url);
|
||||
if let Err(e) = crawl(&mut db, c) {
|
||||
warn!("Channel `{}` update failed: `{e}`", c.url)
|
||||
for c in &config.channel {
|
||||
debug!("Update `{}`...", c.url);
|
||||
let mut tx = db.transaction()?;
|
||||
match crawl(&mut tx, c) {
|
||||
Ok(()) => tx.commit()?,
|
||||
Err(e) => {
|
||||
warn!("Channel `{}` update failed: `{e}`", c.url);
|
||||
tx.rollback()?
|
||||
}
|
||||
}
|
||||
db.commit()?
|
||||
}
|
||||
debug!("Crawl queue completed");
|
||||
if let Some(update) = config.update {
|
||||
|
|
@ -60,7 +58,7 @@ fn main() -> Result<()> {
|
|||
}
|
||||
}
|
||||
|
||||
fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()> {
|
||||
fn crawl(tx: &mut mysql::Transaction, channel_config: &config::Channel) -> Result<()> {
|
||||
use rss::Channel;
|
||||
use scraper::Selector;
|
||||
|
||||
|
|
@ -87,9 +85,9 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
|
|||
|
||||
let channel_items_limit = channel_config.items_limit.unwrap_or(channel_items.len());
|
||||
|
||||
let channel_id = match db.channel_id_by_url(&channel_url)? {
|
||||
let channel_id = match tx.channel_id_by_url(&channel_url)? {
|
||||
Some(channel_id) => channel_id,
|
||||
None => db.insert_channel(&channel_url)?,
|
||||
None => tx.insert_channel(&channel_url)?,
|
||||
};
|
||||
|
||||
for channel_item in channel_items.iter().take(channel_items_limit) {
|
||||
|
|
@ -120,10 +118,10 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
|
|||
continue;
|
||||
}
|
||||
};
|
||||
if db.channel_items_total_by_channel_id_guid(channel_id, guid)? > 0 {
|
||||
if tx.channel_items_total_by_channel_id_guid(channel_id, guid)? > 0 {
|
||||
continue; // skip next steps as processed
|
||||
}
|
||||
let channel_item_id = db.insert_channel_item(
|
||||
let channel_item_id = tx.insert_channel_item(
|
||||
channel_id,
|
||||
pub_date,
|
||||
guid,
|
||||
|
|
@ -188,7 +186,7 @@ fn crawl(db: &mut Transactional, channel_config: &config::Channel) -> Result<()>
|
|||
}
|
||||
},
|
||||
};
|
||||
let _content_id = db.insert_content(channel_item_id, None, &title, &description)?;
|
||||
let _content_id = tx.insert_content(channel_item_id, None, &title, &description)?;
|
||||
// @TODO preload media
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue