implement --index-timeout option

This commit is contained in:
yggverse 2025-07-07 18:21:43 +03:00
parent e4279b7cab
commit ae4e2e4ba9
4 changed files with 19 additions and 4 deletions

View file

@ -146,6 +146,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.json\
[default: 1000]
--index-timeout <INDEX_TIMEOUT>
Remove records from index older than `seconds`
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
Max time to handle each torrent

View file

@ -117,6 +117,10 @@ pub struct Config {
#[arg(long, default_value_t = 1000)]
pub index_capacity: usize,
/// Remove records from index older than `seconds`
#[arg(long)]
pub index_timeout: Option<i64>,
/// Max time to handle each torrent
#[arg(long, default_value_t = 10)]
pub add_torrent_timeout: u64,

View file

@ -1,5 +1,6 @@
mod value;
use chrono::{Duration, Utc};
use std::collections::HashMap;
use value::Value;
@ -7,6 +8,8 @@ use value::Value;
/// * also contains optional meta info to export index as RSS or any other format
pub struct Index {
index: HashMap<String, Value>,
/// Removes outdated values from `index` on `Self::refresh` action
timeout: Option<Duration>,
/// Track index changes to prevent extra disk write operations (safe SSD life)
/// * useful in the static RSS feed generation case, if enabled
is_changed: bool,
@ -16,12 +19,13 @@ pub struct Index {
}
impl Index {
pub fn init(capacity: usize, has_name: bool, has_length: bool) -> Self {
pub fn init(capacity: usize, timeout: Option<i64>, has_name: bool, has_length: bool) -> Self {
Self {
index: HashMap::with_capacity(capacity),
is_changed: false,
has_name,
timeout: timeout.map(Duration::seconds),
has_length,
has_name,
is_changed: false,
}
}
@ -69,7 +73,10 @@ impl Index {
}
pub fn refresh(&mut self) {
if let Some(timeout) = self.timeout {
let t = Utc::now();
self.index.retain(|_, v| t - v.time <= timeout)
}
self.is_changed = false
// @TODO implement also index cleanup by Value timeout
}
}

View file

@ -77,6 +77,7 @@ async fn main() -> Result<()> {
debug.info("Crawler started");
let mut index = Index::init(
config.index_capacity,
config.index_timeout,
config.export_rss.is_some(),
config.export_rss.is_some(),
);