skip processed channel items

This commit is contained in:
yggverse 2026-01-07 19:27:36 +02:00
parent 259ac118dc
commit 776de04c1d

View file

@ -59,7 +59,7 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
use rss::Channel; use rss::Channel;
use scraper::Selector; use scraper::Selector;
// shared local helpers /// local helper
fn scrape(url: &str, selector: &Selector) -> Result<Option<String>> { fn scrape(url: &str, selector: &Selector) -> Result<Option<String>> {
let document = scraper::Html::parse_document(&get(url)?.text()?); let document = scraper::Html::parse_document(&get(url)?.text()?);
Ok(if let Some(first) = document.select(selector).next() { Ok(if let Some(first) = document.select(selector).next() {
@ -70,8 +70,7 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
}) })
} }
// allocate once let channel_url = channel_config.url.to_string(); // allocate once
let channel_url = channel_config.url.to_string();
let channel_items = match Channel::read_from(&get(channel_config.url.as_str())?.bytes()?[..]) { let channel_items = match Channel::read_from(&get(channel_config.url.as_str())?.bytes()?[..]) {
Ok(response) => response.into_items(), Ok(response) => response.into_items(),
@ -116,31 +115,29 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
continue; continue;
} }
}; };
let channel_item_id = match db if !db
.channel_items_by_channel_id_guid(channel_id, guid, Some(1))? .channel_items_by_channel_id_guid(channel_id, guid, Some(1))?
.first() .is_empty()
{ {
Some(result) => result.channel_item_id, continue; // skip next steps as processed
None => db.insert_channel_item( }
channel_id, let channel_item_id = db.insert_channel_item(
pub_date, channel_id,
guid, pub_date,
link, guid,
if channel_config.persist_item_title { link,
channel_item.title() if channel_config.persist_item_title {
} else { channel_item.title()
None } else {
}, None
if channel_config.persist_item_description { },
channel_item.description() if channel_config.persist_item_description {
} else { channel_item.description()
None } else {
}, None
)?, },
}; )?;
// preload remote content..
// preload remote content
let title = match channel_config.content_title_selector { let title = match channel_config.content_title_selector {
Some(ref selector) => match scrape(link, selector) { Some(ref selector) => match scrape(link, selector) {
Ok(value) => match value { Ok(value) => match value {