disallow nullable title/description values for the content table, implement contents_by_channel_item_id_source_id, return last insert id for insert_content, fix content_id data type, implement initial content version save on crawl

This commit is contained in:
yggverse 2026-01-07 19:02:00 +02:00
parent 480cd21e73
commit c0734731cb
2 changed files with 60 additions and 18 deletions

View file

@ -139,37 +139,61 @@ fn crawl(db: &mut Mysql, channel_config: &config::Channel) -> Result<()> {
)?,
};
// @TODO preload remote content
// preload remote content
let title = match channel_config.content_title_selector {
Some(ref selector) => match scrape(&link, selector) {
Ok(value) => value,
Ok(value) => match value {
Some(title) => title,
None => {
warn!("Could not scrape `title` selector in `{channel_url}`");
continue;
}
},
Err(e) => {
warn!("Could not update `title` selector in `{channel_url}`: `{e}`");
continue;
}
},
None => None,
None => match channel_item.title {
Some(ref title) => title.clone(),
None => {
warn!(
"Could not assign `title` from channel item for content in `{channel_url}`"
);
continue;
}
},
};
let description = match channel_config.content_description_selector {
Some(ref selector) => match scrape(&link, selector) {
Ok(value) => value,
Ok(value) => match value {
Some(description) => description,
None => {
warn!("Could not scrape `description` selector in `{channel_url}`");
continue;
}
},
Err(e) => {
warn!("Could not update `description` selector in `{channel_url}`: `{e}`");
continue;
}
},
None => None,
None => match channel_item.description {
Some(ref description) => description.clone(),
None => {
warn!(
"Could not assign `description` from channel item for content in `{channel_url}`"
);
continue;
}
},
};
if title.is_none() && description.is_none() {
continue;
}
// @TODO insert content record
println!("{:?}", description)
assert!(
db.contents_by_channel_item_id_source_id(channel_item_id, None, Some(1))?
.is_empty()
);
let _content_id = db.insert_content(channel_item_id, None, title, description)?;
}
Ok(())
}

View file

@ -93,17 +93,34 @@ impl Mysql {
Ok(self.connection.last_insert_id())
}
pub fn contents_by_channel_item_id_source_id(
&mut self,
channel_item_id: u64,
source_id: Option<u64>,
limit: Option<usize>,
) -> Result<Vec<Content>, Error> {
self.connection.exec_map(
format!(
"SELECT `content_id`, `channel_item_id`, `source_id`, `title`, `description` FROM `content` WHERE `channel_item_id` = ? AND `source_id` = ? LIMIT {}",
limit.unwrap_or(DEFAULT_LIMIT)
),
(channel_item_id, source_id),
|(content_id, channel_item_id,source_id, title, description)| Content { content_id, channel_item_id, source_id, title, description },
)
}
pub fn insert_content(
&mut self,
channel_item_id: u64,
source_id: Option<u64>,
title: &str,
description: &str,
) -> Result<(), Error> {
title: String,
description: String,
) -> Result<u64, Error> {
self.connection.exec_drop(
"INSERT INTO `content` SET `channel_item_id` = ?, `source_id` = ?, `title` = ?, `description` = ?",
(channel_item_id, source_id, title, description ),
)
)?;
Ok(self.connection.last_insert_id())
}
}
@ -126,6 +143,7 @@ pub struct ChannelItem {
#[derive(Debug, PartialEq, Eq)]
pub struct Content {
pub content_id: u64,
pub channel_item_id: u64,
/// None if the original `title` and `description` values
/// parsed from the channel item on crawl