wrap index implementation, skip rss file update if the index is not changed (safes ssd life by prevent extra write operations)

2026-03-31 09:05:33 +00:00 · 2025-07-06 23:33:31 +03:00 · 2025-07-06 23:33:31 +03:00 · b2b69ca9e7
commit b2b69ca9e7
parent ff7bb4c94f
2 changed files with 73 additions and 33 deletions
--- a/src/index.rs
+++ b/src/index.rs
@ -1,7 +1,68 @@
 use chrono::{DateTime, Utc};
+use std::collections::HashMap;

-pub struct Index {
+pub struct Value {
    pub time: DateTime<Utc>,
    pub node: u64,
    pub name: Option<String>,
 }
+
+/// Collect processed info hashes to skip on the next iterations (for this session)
+/// * also contains optional meta info to export index as RSS or any other format
+pub struct Index {
+    index: HashMap<String, Value>,
+    /// Track index changes to prevent extra disk write operations (safe SSD life)
+    /// * useful in the static RSS feed generation case, if enabled.
+    is_changed: bool,
+}
+
+impl Index {
+    pub fn init(capacity: usize) -> Self {
+        Self {
+            index: HashMap::with_capacity(capacity),
+            is_changed: false,
+        }
+    }
+
+    pub fn has(&self, infohash: &str) -> bool {
+        self.index.contains_key(infohash)
+    }
+
+    pub fn is_changed(&self) -> bool {
+        self.is_changed
+    }
+
+    pub fn list(&self) -> &HashMap<String, Value> {
+        &self.index
+    }
+
+    pub fn len(&self) -> usize {
+        self.index.len()
+    }
+
+    pub fn nodes(&self) -> u64 {
+        self.index.values().map(|i| i.node).sum::<u64>()
+    }
+
+    pub fn insert(&mut self, infohash: String, node: u64, name: Option<String>) {
+        if self
+            .index
+            .insert(
+                infohash,
+                Value {
+                    time: Utc::now(),
+                    node,
+                    name,
+                },
+            )
+            .is_none()
+        {
+            self.is_changed = true
+        }
+    }
+
+    pub fn refresh(&mut self) {
+        self.is_changed = false
+        // @TODO implement also index cleanup by Value timeout
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -8,15 +8,10 @@ mod storage;
 mod trackers;

 use anyhow::Result;
-use chrono::Utc;
 use debug::Debug;
 use index::Index;
 use rss::Rss;
-use std::{
-    collections::{HashMap, HashSet},
-    num::NonZero,
-    time::Duration,
-};
+use std::{collections::HashSet, num::NonZero, time::Duration};
 use storage::Storage;
 use url::Url;

@ -64,11 +59,10 @@ async fn main() -> Result<()> {

    // begin
    debug.info("Crawler started");
-    // collect processed info hashes to skip on the next iterations (for this session)
-    // * also contains optional meta info to export index as RSS or any other format
-    let mut index = HashMap::with_capacity(arg.index_capacity);
+    let mut index = Index::init(arg.index_capacity);
    loop {
        debug.info("Index queue begin...");
+        index.refresh();
        for source in &arg.infohash_file {
            debug.info(&format!("Index source `{source}`..."));
            // grab latest info-hashes from this source
@ -77,7 +71,7 @@ async fn main() -> Result<()> {
                Ok(infohashes) => {
                    for i in infohashes {
                        // is already indexed?
-                        if index.contains_key(&i) {
+                        if index.has(&i) {
                            continue;
                        }
                        debug.info(&format!("Index `{i}`..."));
@ -170,14 +164,7 @@ async fn main() -> Result<()> {
                                    // cleanup irrelevant files (see rqbit#408)
                                    storage.cleanup(&i, Some(only_files_keep))?;

-                                    index.insert(
-                                        i,
-                                        Index {
-                                            time: Utc::now(),
-                                            node: only_files_size,
-                                            name,
-                                        },
-                                    );
+                                    index.insert(i, only_files_size, name)
                                }
                                Ok(AddTorrentResponse::ListOnly(r)) => {
                                    if arg.save_torrents {
@ -188,14 +175,7 @@ async fn main() -> Result<()> {
                                    // use `r.info` for Memory, SQLite,
                                    // Manticore and other alternative storage type

-                                    index.insert(
-                                        i,
-                                        Index {
-                                            time: Utc::now(),
-                                            node: 0,
-                                            name: r.info.name.map(|n| n.to_string()),
-                                        },
-                                    );
+                                    index.insert(i, 0, r.info.name.map(|n| n.to_string()))
                                }
                                // unexpected as should be deleted
                                Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
@ -208,7 +188,9 @@ async fn main() -> Result<()> {
                Err(e) => debug.error(&format!("API issue for `{source}`: `{e}`")),
            }
        }
-        if let Some(ref export_rss) = arg.export_rss {
+        if let Some(ref export_rss) = arg.export_rss
+            && index.is_changed()
+        {
            let mut rss = Rss::new(
                export_rss,
                &arg.export_rss_title,
@ -216,7 +198,7 @@ async fn main() -> Result<()> {
                &arg.export_rss_description,
                Some(trackers.clone()),
            )?;
-            for (k, v) in &index {
+            for (k, v) in index.list() {
                rss.push(
                    k,
                    v.name.as_ref().unwrap_or(k),
@ -226,10 +208,7 @@ async fn main() -> Result<()> {
            }
            rss.commit()?
        }
-        if arg
-            .preload_total_size
-            .is_some_and(|s| index.values().map(|i| i.node).sum::<u64>() > s)
-        {
+        if arg.preload_total_size.is_some_and(|s| index.nodes() > s) {
            panic!("Preload content size {} bytes reached!", 0)
        }
        debug.info(&format!(