mirror of
https://github.com/YGGverse/rssto.git
synced 2026-03-31 17:15:29 +00:00
disallow nullable title/description values for the content table, implement contents_by_channel_item_id_source_id, return last insert id for insert_content, fix content_id data type, implement initial content version save on crawl
This commit is contained in:
parent
480cd21e73
commit
c0734731cb
2 changed files with 60 additions and 18 deletions
|
|
@ -139,37 +139,61 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
|
||||||
)?,
|
)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
// @TODO preload remote content
|
// preload remote content
|
||||||
|
|
||||||
let title = match channel_config.content_title_selector {
|
let title = match channel_config.content_title_selector {
|
||||||
Some(ref selector) => match scrape(&link, selector) {
|
Some(ref selector) => match scrape(&link, selector) {
|
||||||
Ok(value) => value,
|
Ok(value) => match value {
|
||||||
|
Some(title) => title,
|
||||||
|
None => {
|
||||||
|
warn!("Could not scrape `title` selector in `{channel_url}`");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Could not update `title` selector in `{channel_url}`: `{e}`");
|
warn!("Could not update `title` selector in `{channel_url}`: `{e}`");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => None,
|
None => match channel_item.title {
|
||||||
|
Some(ref title) => title.clone(),
|
||||||
|
None => {
|
||||||
|
warn!(
|
||||||
|
"Could not assign `title` from channel item for content in `{channel_url}`"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let description = match channel_config.content_description_selector {
|
let description = match channel_config.content_description_selector {
|
||||||
Some(ref selector) => match scrape(&link, selector) {
|
Some(ref selector) => match scrape(&link, selector) {
|
||||||
Ok(value) => value,
|
Ok(value) => match value {
|
||||||
|
Some(description) => description,
|
||||||
|
None => {
|
||||||
|
warn!("Could not scrape `description` selector in `{channel_url}`");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Could not update `description` selector in `{channel_url}`: `{e}`");
|
warn!("Could not update `description` selector in `{channel_url}`: `{e}`");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => None,
|
None => match channel_item.description {
|
||||||
};
|
Some(ref description) => description.clone(),
|
||||||
|
None => {
|
||||||
if title.is_none() && description.is_none() {
|
warn!(
|
||||||
|
"Could not assign `description` from channel item for content in `{channel_url}`"
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
},
|
||||||
// @TODO insert content record
|
};
|
||||||
|
assert!(
|
||||||
println!("{:?}", description)
|
db.contents_by_channel_item_id_source_id(channel_item_id, None, Some(1))?
|
||||||
|
.is_empty()
|
||||||
|
);
|
||||||
|
let _content_id = db.insert_content(channel_item_id, None, title, description)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -93,17 +93,34 @@ impl Mysql {
|
||||||
Ok(self.connection.last_insert_id())
|
Ok(self.connection.last_insert_id())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn contents_by_channel_item_id_source_id(
|
||||||
|
&mut self,
|
||||||
|
channel_item_id: u64,
|
||||||
|
source_id: Option<u64>,
|
||||||
|
limit: Option<usize>,
|
||||||
|
) -> Result<Vec<Content>, Error> {
|
||||||
|
self.connection.exec_map(
|
||||||
|
format!(
|
||||||
|
"SELECT `content_id`, `channel_item_id`, `source_id`, `title`, `description` FROM `content` WHERE `channel_item_id` = ? AND `source_id` = ? LIMIT {}",
|
||||||
|
limit.unwrap_or(DEFAULT_LIMIT)
|
||||||
|
),
|
||||||
|
(channel_item_id, source_id),
|
||||||
|
|(content_id, channel_item_id,source_id, title, description)| Content { content_id, channel_item_id, source_id, title, description },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn insert_content(
|
pub fn insert_content(
|
||||||
&mut self,
|
&mut self,
|
||||||
channel_item_id: u64,
|
channel_item_id: u64,
|
||||||
source_id: Option<u64>,
|
source_id: Option<u64>,
|
||||||
title: &str,
|
title: String,
|
||||||
description: &str,
|
description: String,
|
||||||
) -> Result<(), Error> {
|
) -> Result<u64, Error> {
|
||||||
self.connection.exec_drop(
|
self.connection.exec_drop(
|
||||||
"INSERT INTO `content` SET `channel_item_id` = ?, `source_id` = ?, `title` = ?, `description` = ?",
|
"INSERT INTO `content` SET `channel_item_id` = ?, `source_id` = ?, `title` = ?, `description` = ?",
|
||||||
(channel_item_id, source_id, title, description ),
|
(channel_item_id, source_id, title, description ),
|
||||||
)
|
)?;
|
||||||
|
Ok(self.connection.last_insert_id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -126,6 +143,7 @@ pub struct ChannelItem {
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub struct Content {
|
pub struct Content {
|
||||||
|
pub content_id: u64,
|
||||||
pub channel_item_id: u64,
|
pub channel_item_id: u64,
|
||||||
/// None if the original `title` and `description` values
|
/// None if the original `title` and `description` values
|
||||||
/// parsed from the channel item on crawl
|
/// parsed from the channel item on crawl
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue