From ae4e2e4ba9388840725a56ebb35d40023f1622d3 Mon Sep 17 00:00:00 2001 From: yggverse Date: Mon, 7 Jul 2025 18:21:43 +0300 Subject: [PATCH] implement `--index-timeout` option --- README.md | 3 +++ src/config.rs | 4 ++++ src/index.rs | 15 +++++++++++---- src/main.rs | 1 + 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 57f4cd8..aa28a5e 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.json\ [default: 1000] + --index-timeout + Remove records from index older than `seconds` + --add-torrent-timeout Max time to handle each torrent diff --git a/src/config.rs b/src/config.rs index c9c2b8e..425664c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -117,6 +117,10 @@ pub struct Config { #[arg(long, default_value_t = 1000)] pub index_capacity: usize, + /// Remove records from index older than `seconds` + #[arg(long)] + pub index_timeout: Option, + /// Max time to handle each torrent #[arg(long, default_value_t = 10)] pub add_torrent_timeout: u64, diff --git a/src/index.rs b/src/index.rs index 43d8b23..8a8fd56 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,5 +1,6 @@ mod value; +use chrono::{Duration, Utc}; use std::collections::HashMap; use value::Value; @@ -7,6 +8,8 @@ use value::Value; /// * also contains optional meta info to export index as RSS or any other format pub struct Index { index: HashMap, + /// Removes outdated values from `index` on `Self::refresh` action + timeout: Option, /// Track index changes to prevent extra disk write operations (safe SSD life) /// * useful in the static RSS feed generation case, if enabled is_changed: bool, @@ -16,12 +19,13 @@ pub struct Index { } impl Index { - pub fn init(capacity: usize, has_name: bool, has_length: bool) -> Self { + pub fn init(capacity: usize, timeout: Option, has_name: bool, has_length: bool) -> Self { Self { index: HashMap::with_capacity(capacity), - is_changed: false, - has_name, + timeout: timeout.map(Duration::seconds), has_length, + has_name, + is_changed: false, } } @@ -69,7 +73,10 @@ impl Index { } pub fn refresh(&mut self) { + if let Some(timeout) = self.timeout { + let t = Utc::now(); + self.index.retain(|_, v| t - v.time <= timeout) + } self.is_changed = false - // @TODO implement also index cleanup by Value timeout } } diff --git a/src/main.rs b/src/main.rs index 116badb..d9f5fd4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -77,6 +77,7 @@ async fn main() -> Result<()> { debug.info("Crawler started"); let mut index = Index::init( config.index_capacity, + config.index_timeout, config.export_rss.is_some(), config.export_rss.is_some(), );