From b2b69ca9e784e80ebb3434300df2c8e2e9b6135b Mon Sep 17 00:00:00 2001 From: yggverse Date: Sun, 6 Jul 2025 23:33:31 +0300 Subject: [PATCH] wrap index implementation, skip rss file update if the index is not changed (safes ssd life by prevent extra write operations) --- src/index.rs | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 43 +++++++++-------------------------- 2 files changed, 73 insertions(+), 33 deletions(-) diff --git a/src/index.rs b/src/index.rs index cb2c94e..b78c601 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,7 +1,68 @@ use chrono::{DateTime, Utc}; +use std::collections::HashMap; -pub struct Index { +pub struct Value { pub time: DateTime, pub node: u64, pub name: Option, } + +/// Collect processed info hashes to skip on the next iterations (for this session) +/// * also contains optional meta info to export index as RSS or any other format +pub struct Index { + index: HashMap, + /// Track index changes to prevent extra disk write operations (safe SSD life) + /// * useful in the static RSS feed generation case, if enabled. + is_changed: bool, +} + +impl Index { + pub fn init(capacity: usize) -> Self { + Self { + index: HashMap::with_capacity(capacity), + is_changed: false, + } + } + + pub fn has(&self, infohash: &str) -> bool { + self.index.contains_key(infohash) + } + + pub fn is_changed(&self) -> bool { + self.is_changed + } + + pub fn list(&self) -> &HashMap { + &self.index + } + + pub fn len(&self) -> usize { + self.index.len() + } + + pub fn nodes(&self) -> u64 { + self.index.values().map(|i| i.node).sum::() + } + + pub fn insert(&mut self, infohash: String, node: u64, name: Option) { + if self + .index + .insert( + infohash, + Value { + time: Utc::now(), + node, + name, + }, + ) + .is_none() + { + self.is_changed = true + } + } + + pub fn refresh(&mut self) { + self.is_changed = false + // @TODO implement also index cleanup by Value timeout + } +} diff --git a/src/main.rs b/src/main.rs index ee9e0c3..0616c9f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,15 +8,10 @@ mod storage; mod trackers; use anyhow::Result; -use chrono::Utc; use debug::Debug; use index::Index; use rss::Rss; -use std::{ - collections::{HashMap, HashSet}, - num::NonZero, - time::Duration, -}; +use std::{collections::HashSet, num::NonZero, time::Duration}; use storage::Storage; use url::Url; @@ -64,11 +59,10 @@ async fn main() -> Result<()> { // begin debug.info("Crawler started"); - // collect processed info hashes to skip on the next iterations (for this session) - // * also contains optional meta info to export index as RSS or any other format - let mut index = HashMap::with_capacity(arg.index_capacity); + let mut index = Index::init(arg.index_capacity); loop { debug.info("Index queue begin..."); + index.refresh(); for source in &arg.infohash_file { debug.info(&format!("Index source `{source}`...")); // grab latest info-hashes from this source @@ -77,7 +71,7 @@ async fn main() -> Result<()> { Ok(infohashes) => { for i in infohashes { // is already indexed? - if index.contains_key(&i) { + if index.has(&i) { continue; } debug.info(&format!("Index `{i}`...")); @@ -170,14 +164,7 @@ async fn main() -> Result<()> { // cleanup irrelevant files (see rqbit#408) storage.cleanup(&i, Some(only_files_keep))?; - index.insert( - i, - Index { - time: Utc::now(), - node: only_files_size, - name, - }, - ); + index.insert(i, only_files_size, name) } Ok(AddTorrentResponse::ListOnly(r)) => { if arg.save_torrents { @@ -188,14 +175,7 @@ async fn main() -> Result<()> { // use `r.info` for Memory, SQLite, // Manticore and other alternative storage type - index.insert( - i, - Index { - time: Utc::now(), - node: 0, - name: r.info.name.map(|n| n.to_string()), - }, - ); + index.insert(i, 0, r.info.name.map(|n| n.to_string())) } // unexpected as should be deleted Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(), @@ -208,7 +188,9 @@ async fn main() -> Result<()> { Err(e) => debug.error(&format!("API issue for `{source}`: `{e}`")), } } - if let Some(ref export_rss) = arg.export_rss { + if let Some(ref export_rss) = arg.export_rss + && index.is_changed() + { let mut rss = Rss::new( export_rss, &arg.export_rss_title, @@ -216,7 +198,7 @@ async fn main() -> Result<()> { &arg.export_rss_description, Some(trackers.clone()), )?; - for (k, v) in &index { + for (k, v) in index.list() { rss.push( k, v.name.as_ref().unwrap_or(k), @@ -226,10 +208,7 @@ async fn main() -> Result<()> { } rss.commit()? } - if arg - .preload_total_size - .is_some_and(|s| index.values().map(|i| i.node).sum::() > s) - { + if arg.preload_total_size.is_some_and(|s| index.nodes() > s) { panic!("Preload content size {} bytes reached!", 0) } debug.info(&format!(