From 29ff0b52cc7c267a98b7e499c061381ff20b8d56 Mon Sep 17 00:00:00 2001 From: yggverse Date: Wed, 9 Jul 2025 02:33:02 +0300 Subject: [PATCH] implement torrent files index option --- README.md | 5 ++++- src/config.rs | 4 ++++ src/index.rs | 27 ++++++++++++++++++++++----- src/index/value.rs | 40 ++++++++++++++++++++++++++++++---------- src/main.rs | 44 +++++++++++++++++++++++++++++++++++--------- 5 files changed, 95 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a4a43f4..5fdebca 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\ --infohash Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API - * PR#233 feature + * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic)) --tracker Define custom tracker(s) to preload the `.torrent` files info @@ -149,6 +149,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\ [default: 1000] + --index-list + Index torrent files + --index-timeout Remove records from index older than `seconds` diff --git a/src/config.rs b/src/config.rs index 3b744ad..61b3fd1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -117,6 +117,10 @@ pub struct Config { #[arg(long, default_value_t = 1000)] pub index_capacity: usize, + /// Index torrent files + #[arg(long, default_value_t = false)] + pub index_list: bool, + /// Remove records from index older than `seconds` #[arg(long)] pub index_timeout: Option, diff --git a/src/index.rs b/src/index.rs index e0ec108..ef37dc3 100644 --- a/src/index.rs +++ b/src/index.rs @@ -16,15 +16,23 @@ pub struct Index { /// Store the index value in memory only when it is in use by the init options has_name: bool, has_size: bool, + has_list: bool, } impl Index { - pub fn init(capacity: usize, timeout: Option, has_name: bool, has_size: bool) -> Self { + pub fn init( + capacity: usize, + timeout: Option, + has_name: bool, + has_size: bool, + has_list: bool, + ) -> Self { Self { index: HashMap::with_capacity(capacity), timeout: timeout.map(Duration::seconds), has_size, has_name, + has_list, is_changed: false, } } @@ -49,7 +57,15 @@ impl Index { self.index.values().map(|i| i.node).sum::() } - pub fn insert(&mut self, infohash: String, node: u64, size: u64, name: Option) { + pub fn insert( + &mut self, + infohash: String, + node: u64, + size: u64, + list: Option>, + name: Option, + ) { + println!("{:?}", &list); if self .index .insert( @@ -58,6 +74,7 @@ impl Index { node, if self.has_size { Some(size) } else { None }, if self.has_name { name } else { None }, + if self.has_list { list } else { None }, ), ) .is_none() @@ -80,11 +97,11 @@ fn test() { use std::{thread::sleep, time::Duration}; // test values auto-clean by timeout - let mut i = Index::init(2, Some(3), false, false); + let mut i = Index::init(2, Some(3), false, false, false); - i.insert("h1".to_string(), 0, 0, None); + i.insert("h1".to_string(), 0, 0, None, None); sleep(Duration::from_secs(1)); - i.insert("h2".to_string(), 0, 0, None); + i.insert("h2".to_string(), 0, 0, None, None); i.refresh(); assert_eq!(i.len(), 2); diff --git a/src/index/value.rs b/src/index/value.rs index 00b2177..c09bb6e 100644 --- a/src/index/value.rs +++ b/src/index/value.rs @@ -1,7 +1,5 @@ use chrono::{DateTime, Utc}; -const NAME_MAX_LEN: usize = 125; // + 3 bytes for `...` offset @TODO optional - /// The `Index` value pub struct Value { pub time: DateTime, @@ -9,15 +7,22 @@ pub struct Value { // Isolate by applying internal filter on value set size: Option, name: Option, + list: Option>, } impl Value { /// Create new `Self` with current timestamp - pub fn new(node: u64, size: Option, name: Option) -> Self { + pub fn new( + node: u64, + size: Option, + name: Option, + list: Option>, + ) -> Self { Self { time: Utc::now(), node, size, + list: filter_list(list), name: filter_name(name), } } @@ -25,23 +30,38 @@ impl Value { pub fn name(&self) -> Option<&String> { self.name.as_ref() } + /// Get reference to the safely constructed files `list` member + pub fn list(&self) -> Option<&Vec<(String, u64)>> { + self.list.as_ref() + } /// Get reference to the safely constructed `length` member pub fn size(&self) -> Option { self.size } } -/// Prevent unexpected memory usage on store values from unknown source fn filter_name(value: Option) -> Option { - value.map(|v| { - if v.len() > NAME_MAX_LEN { - format!("{}...", sanitize(&v[..NAME_MAX_LEN])) - } else { - v - } + value.map(crop) +} + +fn filter_list(value: Option>) -> Option> { + value.map(|f| { + f.into_iter() + .map(|(n, l)| (crop(sanitize(&n)), l)) + .collect() }) } +/// Crop long values (prevents unexpected memory pool usage) +fn crop(value: String) -> String { + const L: usize = 125; // + 3 bytes for `...` offset, 128 max @TODO optional + if value.len() > L { + format!("{}...", sanitize(&value[..L])) + } else { + value + } +} + /// Strip tags & bom chars from string fn sanitize(value: &str) -> String { use voca_rs::strip::*; diff --git a/src/main.rs b/src/main.rs index 73ff9f2..fc4f3e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,6 +14,10 @@ use config::Config; use debug::Debug; use format::Format; use index::Index; +use librqbit::{ + AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions, + PeerConnectionOptions, SessionOptions, TorrentMetaV1Info, +}; use peers::Peers; use rss::Rss; use std::{collections::HashSet, num::NonZero, path::PathBuf, time::Duration}; @@ -24,10 +28,6 @@ use url::Url; #[tokio::main] async fn main() -> Result<()> { use clap::Parser; - use librqbit::{ - AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, - PeerConnectionOptions, SessionOptions, - }; use tokio::time; // init components @@ -80,6 +80,7 @@ async fn main() -> Result<()> { config.index_timeout, config.export_rss.is_some(), config.export_rss.is_some(), + config.export_rss.is_some() && config.index_list, ); loop { debug.info("Index queue begin..."); @@ -150,7 +151,7 @@ async fn main() -> Result<()> { config.preload_max_filecount.unwrap_or_default(), ); mt.wait_until_initialized().await?; - let (name, size) = mt.with_metadata(|m| { + let (name, size, list) = mt.with_metadata(|m| { // init preload files list if let Some(ref p) = preload { for (id, info) in m.file_infos.iter().enumerate() { @@ -184,7 +185,11 @@ async fn main() -> Result<()> { save_torrent_file(t, &debug, &i, &m.torrent_bytes) } - (m.info.name.as_ref().map(|n| n.to_string()), size(&m.info)) + ( + m.info.name.as_ref().map(|n| n.to_string()), + size(&m.info), + list(&m.info), + ) })?; session.update_only_files(&mt, &only_files).await?; session.unpause(&mt).await?; @@ -199,7 +204,7 @@ async fn main() -> Result<()> { p.cleanup(&i, Some(only_files_keep))? } - index.insert(i, only_files_size, size, name) + index.insert(i, only_files_size, size, list, name) } Ok(AddTorrentResponse::ListOnly(r)) => { if let Some(ref t) = torrent { @@ -210,7 +215,13 @@ async fn main() -> Result<()> { // use `r.info` for Memory, SQLite, // Manticore and other alternative storage type - index.insert(i, 0, size(&r.info), r.info.name.map(|n| n.to_string())) + index.insert( + i, + 0, + size(&r.info), + list(&r.info), + r.info.name.map(|n| n.to_string()), + ) } // unexpected as should be deleted Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(), @@ -288,7 +299,7 @@ fn magnet(infohash: &str, trackers: Option<&HashSet>) -> String { } /// Count total size, including torrent files -fn size(info: &librqbit::TorrentMetaV1Info) -> u64 { +fn size(info: &TorrentMetaV1Info) -> u64 { let mut t = 0; if let Some(l) = info.length { t += l @@ -300,3 +311,18 @@ fn size(info: &librqbit::TorrentMetaV1Info) -> u64 { } t } + +fn list(info: &TorrentMetaV1Info) -> Option> { + info.files.as_ref().map(|files| { + files + .iter() + .map(|f| { + ( + String::from_utf8(f.path.iter().flat_map(|b| b.to_vec()).collect()) + .unwrap_or_default(), + f.length, + ) + }) + .collect() + }) +}