mirror of
https://github.com/YGGverse/rssto.git
synced 2026-04-01 17:45:30 +00:00
skip processed channel items
This commit is contained in:
parent
259ac118dc
commit
776de04c1d
1 changed files with 23 additions and 26 deletions
|
|
@ -59,7 +59,7 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
|
||||||
use rss::Channel;
|
use rss::Channel;
|
||||||
use scraper::Selector;
|
use scraper::Selector;
|
||||||
|
|
||||||
// shared local helpers
|
/// local helper
|
||||||
fn scrape(url: &str, selector: &Selector) -> Result<Option<String>> {
|
fn scrape(url: &str, selector: &Selector) -> Result<Option<String>> {
|
||||||
let document = scraper::Html::parse_document(&get(url)?.text()?);
|
let document = scraper::Html::parse_document(&get(url)?.text()?);
|
||||||
Ok(if let Some(first) = document.select(selector).next() {
|
Ok(if let Some(first) = document.select(selector).next() {
|
||||||
|
|
@ -70,8 +70,7 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate once
|
let channel_url = channel_config.url.to_string(); // allocate once
|
||||||
let channel_url = channel_config.url.to_string();
|
|
||||||
|
|
||||||
let channel_items = match Channel::read_from(&get(channel_config.url.as_str())?.bytes()?[..]) {
|
let channel_items = match Channel::read_from(&get(channel_config.url.as_str())?.bytes()?[..]) {
|
||||||
Ok(response) => response.into_items(),
|
Ok(response) => response.into_items(),
|
||||||
|
|
@ -116,31 +115,29 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let channel_item_id = match db
|
if !db
|
||||||
.channel_items_by_channel_id_guid(channel_id, guid, Some(1))?
|
.channel_items_by_channel_id_guid(channel_id, guid, Some(1))?
|
||||||
.first()
|
.is_empty()
|
||||||
{
|
{
|
||||||
Some(result) => result.channel_item_id,
|
continue; // skip next steps as processed
|
||||||
None => db.insert_channel_item(
|
}
|
||||||
channel_id,
|
let channel_item_id = db.insert_channel_item(
|
||||||
pub_date,
|
channel_id,
|
||||||
guid,
|
pub_date,
|
||||||
link,
|
guid,
|
||||||
if channel_config.persist_item_title {
|
link,
|
||||||
channel_item.title()
|
if channel_config.persist_item_title {
|
||||||
} else {
|
channel_item.title()
|
||||||
None
|
} else {
|
||||||
},
|
None
|
||||||
if channel_config.persist_item_description {
|
},
|
||||||
channel_item.description()
|
if channel_config.persist_item_description {
|
||||||
} else {
|
channel_item.description()
|
||||||
None
|
} else {
|
||||||
},
|
None
|
||||||
)?,
|
},
|
||||||
};
|
)?;
|
||||||
|
// preload remote content..
|
||||||
// preload remote content
|
|
||||||
|
|
||||||
let title = match channel_config.content_title_selector {
|
let title = match channel_config.content_title_selector {
|
||||||
Some(ref selector) => match scrape(link, selector) {
|
Some(ref selector) => match scrape(link, selector) {
|
||||||
Ok(value) => match value {
|
Ok(value) => match value {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue