mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
implement --index-timeout option
This commit is contained in:
parent
e4279b7cab
commit
ae4e2e4ba9
4 changed files with 19 additions and 4 deletions
|
|
@ -146,6 +146,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.json\
|
||||||
|
|
||||||
[default: 1000]
|
[default: 1000]
|
||||||
|
|
||||||
|
--index-timeout <INDEX_TIMEOUT>
|
||||||
|
Remove records from index older than `seconds`
|
||||||
|
|
||||||
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
||||||
Max time to handle each torrent
|
Max time to handle each torrent
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,10 @@ pub struct Config {
|
||||||
#[arg(long, default_value_t = 1000)]
|
#[arg(long, default_value_t = 1000)]
|
||||||
pub index_capacity: usize,
|
pub index_capacity: usize,
|
||||||
|
|
||||||
|
/// Remove records from index older than `seconds`
|
||||||
|
#[arg(long)]
|
||||||
|
pub index_timeout: Option<i64>,
|
||||||
|
|
||||||
/// Max time to handle each torrent
|
/// Max time to handle each torrent
|
||||||
#[arg(long, default_value_t = 10)]
|
#[arg(long, default_value_t = 10)]
|
||||||
pub add_torrent_timeout: u64,
|
pub add_torrent_timeout: u64,
|
||||||
|
|
|
||||||
15
src/index.rs
15
src/index.rs
|
|
@ -1,5 +1,6 @@
|
||||||
mod value;
|
mod value;
|
||||||
|
|
||||||
|
use chrono::{Duration, Utc};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use value::Value;
|
use value::Value;
|
||||||
|
|
||||||
|
|
@ -7,6 +8,8 @@ use value::Value;
|
||||||
/// * also contains optional meta info to export index as RSS or any other format
|
/// * also contains optional meta info to export index as RSS or any other format
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
index: HashMap<String, Value>,
|
index: HashMap<String, Value>,
|
||||||
|
/// Removes outdated values from `index` on `Self::refresh` action
|
||||||
|
timeout: Option<Duration>,
|
||||||
/// Track index changes to prevent extra disk write operations (safe SSD life)
|
/// Track index changes to prevent extra disk write operations (safe SSD life)
|
||||||
/// * useful in the static RSS feed generation case, if enabled
|
/// * useful in the static RSS feed generation case, if enabled
|
||||||
is_changed: bool,
|
is_changed: bool,
|
||||||
|
|
@ -16,12 +19,13 @@ pub struct Index {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn init(capacity: usize, has_name: bool, has_length: bool) -> Self {
|
pub fn init(capacity: usize, timeout: Option<i64>, has_name: bool, has_length: bool) -> Self {
|
||||||
Self {
|
Self {
|
||||||
index: HashMap::with_capacity(capacity),
|
index: HashMap::with_capacity(capacity),
|
||||||
is_changed: false,
|
timeout: timeout.map(Duration::seconds),
|
||||||
has_name,
|
|
||||||
has_length,
|
has_length,
|
||||||
|
has_name,
|
||||||
|
is_changed: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,7 +73,10 @@ impl Index {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn refresh(&mut self) {
|
pub fn refresh(&mut self) {
|
||||||
|
if let Some(timeout) = self.timeout {
|
||||||
|
let t = Utc::now();
|
||||||
|
self.index.retain(|_, v| t - v.time <= timeout)
|
||||||
|
}
|
||||||
self.is_changed = false
|
self.is_changed = false
|
||||||
// @TODO implement also index cleanup by Value timeout
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,7 @@ async fn main() -> Result<()> {
|
||||||
debug.info("Crawler started");
|
debug.info("Crawler started");
|
||||||
let mut index = Index::init(
|
let mut index = Index::init(
|
||||||
config.index_capacity,
|
config.index_capacity,
|
||||||
|
config.index_timeout,
|
||||||
config.export_rss.is_some(),
|
config.export_rss.is_some(),
|
||||||
config.export_rss.is_some(),
|
config.export_rss.is_some(),
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue